Exemplo n.º 1
0
        public void Hunter3EditorOpen()
        {
            OpenFileDialog ofd    = new OpenFileDialog();
            String         Filter = "所有格式 (*.*)|*.*";;

            ofd.Filter = Filter;
            if (ofd.ShowDialog() == System.Windows.Forms.DialogResult.OK)
            {
                try
                {
                    new HunterEditor(Console, Config, ofd.FileName, false, Filter, false, typeof(StrategyData), null, HunterRichTextBox.TextType.Xml).Show();
                }
                catch (Exception ex)
                {
                    Console.WriteException(ex);
                }
            }
            else
            {
                return;
            }
        }
Exemplo n.º 2
0
        /// <summary>
        /// 下载指定uri中的所有文件。如果为null表示跳过下载,为Empty表示下载没有问题,为路径表示下载不成功。
        /// </summary>
        /// <param name="uri">文件资源定位</param>
        /// <returns>下载信息</returns>
        public DownloadInfo DownloadFile(UriResource uriRes, Hunter h, HunterDownloadThread thisThread)
        {
            Database db = null;

            if (h.projectInfo.DatabaseHelper != null)
            {
                db = h.projectInfo.DatabaseHelper.GetDatabaseInstance();
            }

            flowCalculator.Interval = 1000;
            flowCalculator.Elapsed += new ElapsedEventHandler(flowCalculator_Elapsed);
            try
            {
                HunterWebClient wc = new HunterWebClient();

                #region 判断文件是否重复
                bool isExist = false;
                //检查文件在本地是否重复
                isExist = database.LinkExists(uriRes.Url);
                //如果文件不重复,而又为网络模式,则要检查数据库内的内容

                if (!isExist && (h.projectInfo.CurrentMode == ProjectInfo.HunterMode.network))    //网络模式需要对比数据库和HunterXML
                {
                    if (db == null)
                    {
                        hunterConsole.WriteException(new Exception("数据库连接失败,使用本地模式判重:" + uriRes.Url));
                    }
                    else
                    {
                        bool OpenFailed = false;
                        try
                        {
                            db.DbOpen();
                        }
                        catch (Exception ex)
                        {
                            hunterConsole.WriteException(ex);
                        }
                        bool KRESULT = db.IsRecordExists("tb_file_infos", out isExist, new FieldValue("file_link", uriRes.Url.Replace("'", "\\'").Replace("\"", "\\\"")));
                        if (!KRESULT || OpenFailed)
                        {
                            //hunterConsole.WriteException(new Exceptions("数据库连接失败,使用本地模式判重:" + uriRes.Url));
                            isExist = false;
                        }
                        try
                        {
                            db.DbClose();
                        }
                        catch (Exception ex)
                        {
                            hunterConsole.WriteException(ex);
                        }
                    }
                }
                #endregion

                if (isExist)
                #region 链接重复对应的措施
                {
                    hunterConsole.ReportAbandonURI(uriRes, "链接重复");
                    return(null);
                }
                #endregion
                else
                {
                    #region  载部分
                    try
                    {
                        hunterConsole.WriteDownload("正在下载文件:" + uriRes.Url);
                        hunterConsole.WriteDownload(
                            "线程ID:" + Thread.CurrentThread.ManagedThreadId + Environment.NewLine +
                            "正在下载的文件:" + Environment.NewLine +
                            "下载地址:" + uriRes.Url + Environment.NewLine +
                            "下载的关键字:" + strategy.GetKeyword(uriRes.Keyword) + Environment.NewLine +
                            "下载的页面页码:" + uriRes.index);
                    }
                    catch (Exception ex)
                    {
                        hunterConsole.WriteException(ex);
                    }


                    wc.DownloadFileCompleted   += new System.ComponentModel.AsyncCompletedEventHandler(wc_DownloadFileCompleted); //绑定文件下载事件
                    wc.DownloadProgressChanged += new DownloadProgressChangedEventHandler(wc_DownloadProgressChanged);            //绑定下载进度改变事件

                    //临时文件命名
                    string filepath = HunterUtilities.GetFilenameFromUrl(pInfo, strategy, uriRes); //获取合适的文件名

                    int timeout = 0;

                    flowCalculator.Start(); //开始计算流量
                    receive1 = 0;           //最开始第一次获得的数据量为0

                    wc.DownloadKeyword     = strategy.GetKeyword(uriRes.Keyword);
                    wc.DownloadSource      = uriRes.Url;
                    wc.DownloadDestination = filepath;
                    if (!Directory.Exists(Path.GetDirectoryName(filepath)))
                    {
                        Directory.CreateDirectory(Path.GetDirectoryName(filepath));
                    }

                    wc.DownloadFileAsync(new Uri(uriRes.Url), filepath);    //开始下载

                    bool downloadProblem = false;
                    while (wc.IsBusy)
                    {
                        Thread.Sleep(1000);                                  //使用进程休眠
                        timeout++;
                        if (timeout >= pInfo.timeout || h.DownloadCancelled) //如果超时或下载取消
                        {
                            wc.CancelAsync();
                            wc.Dispose();

                            if (!downloadProblem)
                            {
                                hunterConsole.WriteDownload(
                                    "线程ID:" + Thread.CurrentThread.ManagedThreadId + Environment.NewLine +
                                    "下载超时,取消下载。");
                                try
                                {
                                    hunterConsole.ReportAbandonDownloadInfo(new DownloadInfo("", uriRes.Keyword, uriRes.index, uriRes.Url, "", true, "超时", hunterProject.projectInfo.strategy.GetKeyword(uriRes.Keyword))
                                                                            , "超时");
                                }
                                catch { }
                            }
                            downloadProblem = true;
                        }
                    }
                    flowCalculator.Stop();
                    receive1 = 0;
                    receive2 = 0;                                   //清空流量计
                    hunterConsole.outputSpeedInfo(DateTime.Now, 0); //流量计清零

                    wc.Dispose();
                    #endregion

                    if (!downloadProblem)
                    {   //如果没有下载问题
                        string MD5 = string.Empty;
                        try
                        {
                            MD5 = HunterUtilities.GetMD5Hash(filepath);
                        }
                        catch (Exception e)
                        {
                            hunterConsole.WriteException(new Exception("无法获取MD5。"));
                            hunterConsole.WriteException(e);
                        }

                        #region 文件MD5是否重复
                        bool isDuplicate; //记录文件是否重复
                        //判断是否与本地XML重复
                        isDuplicate = database.isDuplicate(uriRes.Url, MD5);
                        if (!isDuplicate && (h.projectInfo.CurrentMode == ProjectInfo.HunterMode.network))    //网络模式需要对比数据库和HunterXML
                        {
                            if (db == null)
                            {
                                hunterConsole.WriteException(new Exception("数据库连接失败,使用本地模式判重:" + MD5));
                            }
                            else
                            {
                                bool OpenFailed = false;
                                try
                                {
                                    db.DbOpen();
                                }
                                catch (Exception ex)
                                {
                                    hunterConsole.WriteException(ex);
                                }
                                bool KRESULT = db.IsFileExists("tb_file_infos", out isDuplicate, MD5);
                                if (!KRESULT || OpenFailed)
                                {
                                    hunterConsole.WriteException(new Exception("数据库连接失败,使用本地模式判重:" + MD5));
                                    isDuplicate = false;
                                }
                                try
                                {
                                    db.DbClose();
                                }
                                catch (Exception ex)
                                {
                                    hunterConsole.WriteException(ex);
                                }
                            }
                        }

                        #endregion

                        #region 文件重复、不重复对应的动作
                        if (!isDuplicate)
                        {  //检测是否重复。如果不重复则入库
                            wc.XMLFile = Path.Combine(hunterProject.projectInfo.filefolder, "$__" + Path.GetFileName(wc.DownloadDestination)) + ".xml";
                            try
                            {
                                HunterUtilities.WriteDownloadFileXML(wc.DownloadSource, wc.DownloadKeyword, Path.GetFileName(wc.DownloadDestination), (hunterProject.projectInfo.search_language == ProjectInfo.Language.none ? null : hunterProject.projectInfo.search_language.ToString()),
                                                                     wc.XMLFile);
                            }
                            catch (Exception ex)
                            {
                                hunterConsole.WriteException(ex);
                            }
                            database.addNewRecord(uriRes.Url, wc.DownloadKeyword, filepath, MD5);
                            DownloadInfo d = new DownloadInfo(filepath, uriRes.Keyword, uriRes.index, uriRes.Url, MD5, false, "已下载", hunterProject.projectInfo.strategy.GetKeyword(uriRes.Keyword));
                            //Network模式:自动上传样张
                            if (h.projectInfo.CurrentMode == ProjectInfo.HunterMode.network)
                            #region 网络模式上传样张
                            {
                                String t_md5            = HunterUtilities.GetMD5Hash(filepath); //获得文件MD5码
                                String filename         = t_md5 + "_" + Path.GetFileName(filepath);
                                String combinedPath     = Path.Combine(pInfo.share_remote_path, "cache", ProjectInfo.IP_ADDRESS + " (" + pInfo.name + ")");
                                String combinedFullPath = Path.Combine(combinedPath, filename);
                                if (!Directory.Exists(combinedPath))
                                {
                                    Directory.CreateDirectory(combinedPath);
                                }

                                bool      fileMoveSuccess = false;
                                const int maxMoveCount    = 5;
                                int       moveCount       = 0;

                                try
                                {
                                    if (File.Exists(wc.XMLFile))
                                    {
                                        File.Delete(Path.Combine(combinedPath, Path.GetFileName(wc.XMLFile)));
                                        File.Move(wc.XMLFile, Path.Combine(combinedPath, Path.GetFileName(wc.XMLFile)));
                                    }
                                }
                                catch (Exception e)
                                {
                                    hunterConsole.WriteException(e);
                                }

                                while (!fileMoveSuccess)
                                {
                                    try
                                    {
                                        if (moveCount > maxMoveCount)
                                        {
                                            break;
                                        }
                                        File.Move(filepath, combinedFullPath);
                                        fileMoveSuccess = true;
                                    }
                                    catch (Exception e)
                                    {
                                        moveCount++;
                                        hunterConsole.WriteException(e);
                                    }
                                }
                            }
                            #endregion

                            count++;
                            hunterConsole.outputDownloadedFileNum(DateTime.Now, count);
                            hunterConsole.ReportDownloadInfo(d);
                            return(d);
                        }
                        else
                        {
                            DownloadInfo d = new DownloadInfo(filepath, uriRes.Keyword,
                                                              uriRes.index, uriRes.Url, MD5, true, "MD5重复", hunterProject.projectInfo.strategy.GetKeyword(uriRes.Keyword));
                            hunterConsole.ReportAbandonDownloadInfo(d, "MD5重复");
                            return(d);   //删除文件
                        }
                        #endregion
                    }
                    return(new DownloadInfo(filepath, uriRes.Keyword, uriRes.index,
                                            uriRes.Url, null, true, "重复", hunterProject.projectInfo.strategy.GetKeyword(uriRes.Keyword))); //删除文件
                }
            }
            catch (Exception e)
            {
                //*此处预留错误处理
                hunterConsole.WriteException(e);
                return(null);
            }
        }
Exemplo n.º 3
0
        /// <summary>
        /// 捕获带有filetype结尾的链接。proxy为代理,为null表示不使用代理。
        /// </summary>
        /// <returns>返回一个本次页面中捕获的链接序列</returns>
        public List <string> HuntUris(HunterProxy proxy, HunterForm main)
        {
            Regex         linkReg  = null;
            String        htmlCode = null;
            List <string> thisURL  = new List <string>(); //记录本次匹配的所有URL项

            try
            {
                linkReg = new Regex(strategy.StrategyData.configuration.Regex);    //超链接+超链接文本

                WebProxy webproxy;
                if (proxy != null)
                {
                    webproxy = new WebProxy(proxy.IPAndPort);
                }
                else
                {
                    webproxy = null;
                }

                if (proxy != null)
                {
                    mHunterConsole.WriteDetails("正在使用代理:" + proxy.IPAndPort + "(" + proxy.Description + ")");
                }
                mHunterConsole.WriteDetails("准备分析页面:" + urlAddress);
                htmlCode = GetPageHtml(webproxy, main);
                mHunterConsole.WriteHTML(htmlCode);
            }
            catch (WebException ex)    //如果是返回超时,返回一个Count>0的随机结果
            {
                thisURL.Add("{/WebException/}" + new Random().Next().ToString());

                mHunterConsole.WriteDetails("页面" + urlAddress + "请求失败。原因:" + ex.Message);
                mHunterConsole.ReportAbandonURI(new UriResource(urlAddress, strategy.CurrentKeywordProgress,
                                                                strategy.CurrentSearchProgress, null), ex.Message);
                return(thisURL);
            }
            catch (Exception ex)
            {
                mHunterConsole.WriteException(ex);
            }

            try
            {
                Match m = linkReg.Match(htmlCode);
                while (m.Success)
                {
                    allCount++;
                    mHunterConsole.outputAnalysedUris(DateTime.Now, allCount);

                    //得到一个网址后,保存起来
                    string linkText = m.Result("${text}");

                    thisURL.Add(linkText);   //记录本次获取到的linkText

                    if (strategy.HasForbiddenWord(linkText))
                    {
                        m = m.NextMatch();
                        continue;    //如果含有违禁词语 则放弃下载 继续下一个
                    }

                    //对linkText中的内容进行处理,去掉里面的尖括号
                    Regex r = new Regex("<(.*?)>");
                    linkText = r.Replace(linkText, "");

                    string uri = null;
                    try
                    {
                        uri = (strategy.StrategyData.configuration.Redirect.ToLower() == "true") ? GetTheRedirectUrl(m.Result("${url}")) : (m.Result("${url}"));
                    }
                    catch (WebException)
                    {
                        mHunterConsole.WriteDetails("链接" + (m.Result("${url}") + "重定向超时。"));
                        mHunterConsole.ReportAbandonURI(new UriResource((m.Result("${url}")), strategy.CurrentKeywordProgress,
                                                                        strategy.CurrentSearchProgress, null), "重定向超时");
                        m = m.NextMatch();
                        continue;
                    }

                    if (uri.EndsWith("." + strategy.Filetype))
                    {
                        availableCount++;
                        mHunterConsole.outputAvailableUris(DateTime.Now, availableCount);

                        UriResource u = new UriResource(uri, strategy.CurrentKeywordProgress
                                                        , strategy.CurrentSearchProgress, linkText); //封装成一个Uri资源

                        if (!uriQueue.Contains(u))                                                   //考虑在多线程中,可能会出现重复项目
                        {
                            uriQueue.Enqueue(u);                                                     //将一个资源放入队列
                        }
                        mHunterConsole.outputDownloadingUriInfo(DateTime.Now,
                                                                "找到的资源的URL:" + u.Url + Environment.NewLine +
                                                                "标题:" + u.Text + Environment.NewLine +
                                                                "关键字:" + strategy.GetKeyword(u.Keyword) + Environment.NewLine +
                                                                "搜索页码:" + u.index + Environment.NewLine +
                                                                "已列入下载队列。");

                        mHunterConsole.WriteDetails("正在获得有效URI:" + uri);
                    }

                    m = m.NextMatch();
                }
            }
            catch (Exception ex)
            {
                mHunterConsole.WriteException(ex);
            }

            return(thisURL);
        }
Exemplo n.º 4
0
        //asReflectionObject 表示此编辑器是用反射、序列化来处理文本,还是作为普通文本编辑器来处理文本
        public HunterEditor(HunterConsole c, HunterConfig config, String LoadFile, bool AsAModel, String fileFilter, bool asReflectionObject, Type objectType, object loadObject, Hunter3.HunterRichTextBox.TextType textType)
        {
            FileFilter              = fileFilter;
            this.AsAModel           = AsAModel;
            this.LoadFile           = LoadFile;
            ObjectType              = objectType;
            Config                  = config;
            Console                 = c;
            this.AsReflectionObject = asReflectionObject;
            LoadObject              = loadObject;
            InitializeComponent();

            hSearchBar.Init(hTextBox, tsLabel, HunterConfig.ColorBarForeColor);
            hHTMLGetterBar.Init(hTextBox, HunterConfig.ColorBarForeColor);
            hTextBox.ContentType = textType;
            tsLabel.BackColor    = Color.Transparent;
            FormBorderStyle      = FormBorderStyle.Sizable;
            MainToolStrip        = msMenu;
            try
            {
                if (LoadFile != null)
                {
                    hTextBox.LoadFile(LoadFile);
                    if (AsAModel)
                    {
                        FileSaved = false;
                    }
                    else
                    {
                        Filename = LoadFile;
                    }

                    ClearDirty();
                }
            }
            catch (Exception ex)
            {
                Console.WriteException(ex);
            }

            if (!AsReflectionObject)
            {
                sContainer.Panel2Collapsed = true;
            }
            else
            {
                try
                {
                    RefreshXML();
                    LoadProperty();
                }
                catch (Exception ex)
                {
                    Console.WriteException(ex);
                }
            }

            FormClosing += new FormClosingEventHandler(HunterEditor_FormClosing);
            propertyGrid.PropertyValueChanged += new PropertyValueChangedEventHandler(propertyGrid_PropertyValueChanged);
            hTextBox.SelectionChanged         += new EventHandler(hTextBox_SelectionChanged);
            hTextBox.TextChanged += new EventHandler((object s, EventArgs ea) =>
            {
                if (hTextBox.Modified)
                {
                    FileSaved = false;
                    Text      = CaptionFilename + " *";
                }
                RefreshUI();
                RefreshPropertyGrid();
            });
            RefreshUI();
        }
Exemplo n.º 5
0
        public void Start()
        {
            DownloadCancelled = false;
            try
            {
                if (projectInfo.mode == ProjectInfo.HunterMode.network)
                {
                    projectInfo.CreateIPC();
                }
                //记录辞典文件的MD5码
                string lastMD5 = projectInfo.LoadlastDicMD5(); //读取上次保存的辞典MD5
                projectInfo.SaveDicMD5();                      //保存此次的辞典MD5
                if (lastMD5 != HunterUtilities.GetMD5Hash(projectInfo.dictionary))
                {
                    //如果两次MD5不一致,说明辞典文件已经改变。询问是否重置辞典
                    if (projectInfo.strategy.CurrentSearchProgress != 0 ||
                        projectInfo.strategy.CurrentKeywordProgress != 0)
                    {
                        DialogResult dr =
                            MessageBox.Show("您的辞典已经更新。要将搜索进度置零,重新开始搜索吗?", "Hunter 3", MessageBoxButtons.YesNo, MessageBoxIcon.Question);

                        if (dr == DialogResult.Yes)
                        {
                            projectInfo.strategy.CurrentKeywordProgress = 0;
                            projectInfo.strategy.CurrentSearchProgress  = 0;
                            projectInfo.strategy.RefreshProgress(0, 0);
                        }
                    }
                }
            }
            catch (Exception e)
            {
                mHunterConsole.WriteException(e);
            }
//
//             //以Mode来判断是否连接服务器
            try
            {
                if (projectInfo.CurrentMode == ProjectInfo.HunterMode.network)
                {
                    //尝试连接一次数据库
                    try
                    {
                        projectInfo.DatabaseHelper.database.DbOpen();
                        projectInfo.DatabaseHelper.database.DbClose();
                    }
                    catch
                    {
                        MessageBox.Show("连接数据库失败,本任务改为本地模式。", "Hunter 3", MessageBoxButtons.OK, MessageBoxIcon.Information);
                        projectInfo.CurrentMode = ProjectInfo.HunterMode.local;
                    }
                }

                isHuntingUri      = true;
                isDownloadingUris = true;

                mHunterConsole.outputStartInformation(DateTime.Now, projectInfo.timeout.ToString());

                for (int i = 0; i < hunterThreads.Length; i++)
                {
                    if (projectInfo.CurrentMode != ProjectInfo.HunterMode.local)
                    {
                        hunterThreads[i].databaseHelper = new HunterDatabaseHelper(projectInfo);
                        //hunterThreads[i].databaseHelper.connect();
                    }
                    hunterThreads[i].downloadThread.Start(hunterThreads[i]);
                }

                if (mHunterConfig.UseProxy)
                {
                    for (int i = 0; i < ProxyGetThreads.Length; i++)
                    {
                        ProxyGetThreads[i].Start();
                    }
                }

                thHuntUris.Start();
            }
            catch (Exception e)
            {
                mHunterConsole.WriteException(e);
            }
        }
Exemplo n.º 6
0
        public Hunter(HunterConsole oh, HunterConfig config, ProjectInfo _pj, HunterForm main)
        {
            try
            {
                MainForm       = main;
                Error          = false;
                mHunterConsole = oh;
                projectInfo    = _pj;
                mHunterConfig  = config;
                ProxyFetcher   = new HunterProxyFetcher(AvailableProxies);
                projectInfo    = ProjectInfo.LoadProject(_pj.mHunterConsole, _pj.projectPath, _pj.strategyPath, true);

                downloadThreadNum = int.Parse(projectInfo.threadnum);
                hunterThreads     = new HunterDownloadThread[downloadThreadNum];

                //获取代理的线程
                ProxyGetThreads = new Thread[downloadThreadNum];

                if (mHunterConfig.UseProxy == true)
                {
                    FileStream   fs = new FileStream("proxy.hip", FileMode.Open, FileAccess.Read);
                    StreamReader sr = new StreamReader(fs);
                    ProxyText = sr.ReadToEnd();
                    sr.Close();
                    fs.Close();
                    AllProxies = HunterProxy.GetProxy(ProxyText, mHunterConfig.ProxyFilterKeywords);
                }

                mHunterConsole.WriteMessage(projectInfo.ConfigInformation());
                mHunterConsole.WriteMessage("");
                mHunterConsole.WriteMessage(projectInfo.strategy.GetStrategyInformation());

                xmlDatabase = new XMLDatabase(projectInfo.database, mHunterConsole);
                xmlDatabase.openDatabase();

                try
                {
                    if (downloadThreadNum <= 0)
                    {
                        mHunterConsole.WriteMessage("配置错误:下载线程数不能小于0。");
                        return;
                    }
                }
                catch (Exception ex)
                {
                    mHunterConsole.WriteException(ex);
                    return;
                }

                for (int i = 0; i < hunterThreads.Length; i++)
                {
                    hunterThreads[i] = new HunterDownloadThread();
                    hunterThreads[i].downloadThread = new Thread(threadDownloadUris);
                }

                if (mHunterConfig.UseProxy)
                {
                    for (int i = 0; i < ProxyGetThreads.Length; i++)
                    {
                        ProxyGetThreads[i] = new Thread(GetAvaliableProxies);
                    }
                }

                if (projectInfo.strategy.Keywords.Count <= 0)
                {
                    projectInfo.mHunterConsole.WriteMessage("没有找到关键字,任务取消。");
                    Error = true;
                    return;
                }

                thHuntUris = new Thread(threadHuntUris);
                thHuntUris.SetApartmentState(ApartmentState.STA);

                hUri = new HunterUri(this);
                projectInfo.strategy.RecordFirstWord();

                mHunterConsole.WriteMessage("下载线程总数:" + hunterThreads.Length);
                mHunterConsole.WriteMessage("读取配置完毕。");
                mHunterConsole.WriteMessage("正在运行任务...");
            }
            catch (Exception e)
            {
                mHunterConsole.WriteException(e);
            }
        }