public void Hunter3EditorOpen() { OpenFileDialog ofd = new OpenFileDialog(); String Filter = "所有格式 (*.*)|*.*";; ofd.Filter = Filter; if (ofd.ShowDialog() == System.Windows.Forms.DialogResult.OK) { try { new HunterEditor(Console, Config, ofd.FileName, false, Filter, false, typeof(StrategyData), null, HunterRichTextBox.TextType.Xml).Show(); } catch (Exception ex) { Console.WriteException(ex); } } else { return; } }
/// <summary> /// 下载指定uri中的所有文件。如果为null表示跳过下载,为Empty表示下载没有问题,为路径表示下载不成功。 /// </summary> /// <param name="uri">文件资源定位</param> /// <returns>下载信息</returns> public DownloadInfo DownloadFile(UriResource uriRes, Hunter h, HunterDownloadThread thisThread) { Database db = null; if (h.projectInfo.DatabaseHelper != null) { db = h.projectInfo.DatabaseHelper.GetDatabaseInstance(); } flowCalculator.Interval = 1000; flowCalculator.Elapsed += new ElapsedEventHandler(flowCalculator_Elapsed); try { HunterWebClient wc = new HunterWebClient(); #region 判断文件是否重复 bool isExist = false; //检查文件在本地是否重复 isExist = database.LinkExists(uriRes.Url); //如果文件不重复,而又为网络模式,则要检查数据库内的内容 if (!isExist && (h.projectInfo.CurrentMode == ProjectInfo.HunterMode.network)) //网络模式需要对比数据库和HunterXML { if (db == null) { hunterConsole.WriteException(new Exception("数据库连接失败,使用本地模式判重:" + uriRes.Url)); } else { bool OpenFailed = false; try { db.DbOpen(); } catch (Exception ex) { hunterConsole.WriteException(ex); } bool KRESULT = db.IsRecordExists("tb_file_infos", out isExist, new FieldValue("file_link", uriRes.Url.Replace("'", "\\'").Replace("\"", "\\\""))); if (!KRESULT || OpenFailed) { //hunterConsole.WriteException(new Exceptions("数据库连接失败,使用本地模式判重:" + uriRes.Url)); isExist = false; } try { db.DbClose(); } catch (Exception ex) { hunterConsole.WriteException(ex); } } } #endregion if (isExist) #region 链接重复对应的措施 { hunterConsole.ReportAbandonURI(uriRes, "链接重复"); return(null); } #endregion else { #region 载部分 try { hunterConsole.WriteDownload("正在下载文件:" + uriRes.Url); hunterConsole.WriteDownload( "线程ID:" + Thread.CurrentThread.ManagedThreadId + Environment.NewLine + "正在下载的文件:" + Environment.NewLine + "下载地址:" + uriRes.Url + Environment.NewLine + "下载的关键字:" + strategy.GetKeyword(uriRes.Keyword) + Environment.NewLine + "下载的页面页码:" + uriRes.index); } catch (Exception ex) { hunterConsole.WriteException(ex); } wc.DownloadFileCompleted += new System.ComponentModel.AsyncCompletedEventHandler(wc_DownloadFileCompleted); //绑定文件下载事件 wc.DownloadProgressChanged += new DownloadProgressChangedEventHandler(wc_DownloadProgressChanged); //绑定下载进度改变事件 //临时文件命名 string filepath = HunterUtilities.GetFilenameFromUrl(pInfo, strategy, uriRes); //获取合适的文件名 int timeout = 0; flowCalculator.Start(); //开始计算流量 receive1 = 0; //最开始第一次获得的数据量为0 wc.DownloadKeyword = strategy.GetKeyword(uriRes.Keyword); wc.DownloadSource = uriRes.Url; wc.DownloadDestination = filepath; if (!Directory.Exists(Path.GetDirectoryName(filepath))) { Directory.CreateDirectory(Path.GetDirectoryName(filepath)); } wc.DownloadFileAsync(new Uri(uriRes.Url), filepath); //开始下载 bool downloadProblem = false; while (wc.IsBusy) { Thread.Sleep(1000); //使用进程休眠 timeout++; if (timeout >= pInfo.timeout || h.DownloadCancelled) //如果超时或下载取消 { wc.CancelAsync(); wc.Dispose(); if (!downloadProblem) { hunterConsole.WriteDownload( "线程ID:" + Thread.CurrentThread.ManagedThreadId + Environment.NewLine + "下载超时,取消下载。"); try { hunterConsole.ReportAbandonDownloadInfo(new DownloadInfo("", uriRes.Keyword, uriRes.index, uriRes.Url, "", true, "超时", hunterProject.projectInfo.strategy.GetKeyword(uriRes.Keyword)) , "超时"); } catch { } } downloadProblem = true; } } flowCalculator.Stop(); receive1 = 0; receive2 = 0; //清空流量计 hunterConsole.outputSpeedInfo(DateTime.Now, 0); //流量计清零 wc.Dispose(); #endregion if (!downloadProblem) { //如果没有下载问题 string MD5 = string.Empty; try { MD5 = HunterUtilities.GetMD5Hash(filepath); } catch (Exception e) { hunterConsole.WriteException(new Exception("无法获取MD5。")); hunterConsole.WriteException(e); } #region 文件MD5是否重复 bool isDuplicate; //记录文件是否重复 //判断是否与本地XML重复 isDuplicate = database.isDuplicate(uriRes.Url, MD5); if (!isDuplicate && (h.projectInfo.CurrentMode == ProjectInfo.HunterMode.network)) //网络模式需要对比数据库和HunterXML { if (db == null) { hunterConsole.WriteException(new Exception("数据库连接失败,使用本地模式判重:" + MD5)); } else { bool OpenFailed = false; try { db.DbOpen(); } catch (Exception ex) { hunterConsole.WriteException(ex); } bool KRESULT = db.IsFileExists("tb_file_infos", out isDuplicate, MD5); if (!KRESULT || OpenFailed) { hunterConsole.WriteException(new Exception("数据库连接失败,使用本地模式判重:" + MD5)); isDuplicate = false; } try { db.DbClose(); } catch (Exception ex) { hunterConsole.WriteException(ex); } } } #endregion #region 文件重复、不重复对应的动作 if (!isDuplicate) { //检测是否重复。如果不重复则入库 wc.XMLFile = Path.Combine(hunterProject.projectInfo.filefolder, "$__" + Path.GetFileName(wc.DownloadDestination)) + ".xml"; try { HunterUtilities.WriteDownloadFileXML(wc.DownloadSource, wc.DownloadKeyword, Path.GetFileName(wc.DownloadDestination), (hunterProject.projectInfo.search_language == ProjectInfo.Language.none ? null : hunterProject.projectInfo.search_language.ToString()), wc.XMLFile); } catch (Exception ex) { hunterConsole.WriteException(ex); } database.addNewRecord(uriRes.Url, wc.DownloadKeyword, filepath, MD5); DownloadInfo d = new DownloadInfo(filepath, uriRes.Keyword, uriRes.index, uriRes.Url, MD5, false, "已下载", hunterProject.projectInfo.strategy.GetKeyword(uriRes.Keyword)); //Network模式:自动上传样张 if (h.projectInfo.CurrentMode == ProjectInfo.HunterMode.network) #region 网络模式上传样张 { String t_md5 = HunterUtilities.GetMD5Hash(filepath); //获得文件MD5码 String filename = t_md5 + "_" + Path.GetFileName(filepath); String combinedPath = Path.Combine(pInfo.share_remote_path, "cache", ProjectInfo.IP_ADDRESS + " (" + pInfo.name + ")"); String combinedFullPath = Path.Combine(combinedPath, filename); if (!Directory.Exists(combinedPath)) { Directory.CreateDirectory(combinedPath); } bool fileMoveSuccess = false; const int maxMoveCount = 5; int moveCount = 0; try { if (File.Exists(wc.XMLFile)) { File.Delete(Path.Combine(combinedPath, Path.GetFileName(wc.XMLFile))); File.Move(wc.XMLFile, Path.Combine(combinedPath, Path.GetFileName(wc.XMLFile))); } } catch (Exception e) { hunterConsole.WriteException(e); } while (!fileMoveSuccess) { try { if (moveCount > maxMoveCount) { break; } File.Move(filepath, combinedFullPath); fileMoveSuccess = true; } catch (Exception e) { moveCount++; hunterConsole.WriteException(e); } } } #endregion count++; hunterConsole.outputDownloadedFileNum(DateTime.Now, count); hunterConsole.ReportDownloadInfo(d); return(d); } else { DownloadInfo d = new DownloadInfo(filepath, uriRes.Keyword, uriRes.index, uriRes.Url, MD5, true, "MD5重复", hunterProject.projectInfo.strategy.GetKeyword(uriRes.Keyword)); hunterConsole.ReportAbandonDownloadInfo(d, "MD5重复"); return(d); //删除文件 } #endregion } return(new DownloadInfo(filepath, uriRes.Keyword, uriRes.index, uriRes.Url, null, true, "重复", hunterProject.projectInfo.strategy.GetKeyword(uriRes.Keyword))); //删除文件 } } catch (Exception e) { //*此处预留错误处理 hunterConsole.WriteException(e); return(null); } }
/// <summary> /// 捕获带有filetype结尾的链接。proxy为代理,为null表示不使用代理。 /// </summary> /// <returns>返回一个本次页面中捕获的链接序列</returns> public List <string> HuntUris(HunterProxy proxy, HunterForm main) { Regex linkReg = null; String htmlCode = null; List <string> thisURL = new List <string>(); //记录本次匹配的所有URL项 try { linkReg = new Regex(strategy.StrategyData.configuration.Regex); //超链接+超链接文本 WebProxy webproxy; if (proxy != null) { webproxy = new WebProxy(proxy.IPAndPort); } else { webproxy = null; } if (proxy != null) { mHunterConsole.WriteDetails("正在使用代理:" + proxy.IPAndPort + "(" + proxy.Description + ")"); } mHunterConsole.WriteDetails("准备分析页面:" + urlAddress); htmlCode = GetPageHtml(webproxy, main); mHunterConsole.WriteHTML(htmlCode); } catch (WebException ex) //如果是返回超时,返回一个Count>0的随机结果 { thisURL.Add("{/WebException/}" + new Random().Next().ToString()); mHunterConsole.WriteDetails("页面" + urlAddress + "请求失败。原因:" + ex.Message); mHunterConsole.ReportAbandonURI(new UriResource(urlAddress, strategy.CurrentKeywordProgress, strategy.CurrentSearchProgress, null), ex.Message); return(thisURL); } catch (Exception ex) { mHunterConsole.WriteException(ex); } try { Match m = linkReg.Match(htmlCode); while (m.Success) { allCount++; mHunterConsole.outputAnalysedUris(DateTime.Now, allCount); //得到一个网址后,保存起来 string linkText = m.Result("${text}"); thisURL.Add(linkText); //记录本次获取到的linkText if (strategy.HasForbiddenWord(linkText)) { m = m.NextMatch(); continue; //如果含有违禁词语 则放弃下载 继续下一个 } //对linkText中的内容进行处理,去掉里面的尖括号 Regex r = new Regex("<(.*?)>"); linkText = r.Replace(linkText, ""); string uri = null; try { uri = (strategy.StrategyData.configuration.Redirect.ToLower() == "true") ? GetTheRedirectUrl(m.Result("${url}")) : (m.Result("${url}")); } catch (WebException) { mHunterConsole.WriteDetails("链接" + (m.Result("${url}") + "重定向超时。")); mHunterConsole.ReportAbandonURI(new UriResource((m.Result("${url}")), strategy.CurrentKeywordProgress, strategy.CurrentSearchProgress, null), "重定向超时"); m = m.NextMatch(); continue; } if (uri.EndsWith("." + strategy.Filetype)) { availableCount++; mHunterConsole.outputAvailableUris(DateTime.Now, availableCount); UriResource u = new UriResource(uri, strategy.CurrentKeywordProgress , strategy.CurrentSearchProgress, linkText); //封装成一个Uri资源 if (!uriQueue.Contains(u)) //考虑在多线程中,可能会出现重复项目 { uriQueue.Enqueue(u); //将一个资源放入队列 } mHunterConsole.outputDownloadingUriInfo(DateTime.Now, "找到的资源的URL:" + u.Url + Environment.NewLine + "标题:" + u.Text + Environment.NewLine + "关键字:" + strategy.GetKeyword(u.Keyword) + Environment.NewLine + "搜索页码:" + u.index + Environment.NewLine + "已列入下载队列。"); mHunterConsole.WriteDetails("正在获得有效URI:" + uri); } m = m.NextMatch(); } } catch (Exception ex) { mHunterConsole.WriteException(ex); } return(thisURL); }
//asReflectionObject 表示此编辑器是用反射、序列化来处理文本,还是作为普通文本编辑器来处理文本 public HunterEditor(HunterConsole c, HunterConfig config, String LoadFile, bool AsAModel, String fileFilter, bool asReflectionObject, Type objectType, object loadObject, Hunter3.HunterRichTextBox.TextType textType) { FileFilter = fileFilter; this.AsAModel = AsAModel; this.LoadFile = LoadFile; ObjectType = objectType; Config = config; Console = c; this.AsReflectionObject = asReflectionObject; LoadObject = loadObject; InitializeComponent(); hSearchBar.Init(hTextBox, tsLabel, HunterConfig.ColorBarForeColor); hHTMLGetterBar.Init(hTextBox, HunterConfig.ColorBarForeColor); hTextBox.ContentType = textType; tsLabel.BackColor = Color.Transparent; FormBorderStyle = FormBorderStyle.Sizable; MainToolStrip = msMenu; try { if (LoadFile != null) { hTextBox.LoadFile(LoadFile); if (AsAModel) { FileSaved = false; } else { Filename = LoadFile; } ClearDirty(); } } catch (Exception ex) { Console.WriteException(ex); } if (!AsReflectionObject) { sContainer.Panel2Collapsed = true; } else { try { RefreshXML(); LoadProperty(); } catch (Exception ex) { Console.WriteException(ex); } } FormClosing += new FormClosingEventHandler(HunterEditor_FormClosing); propertyGrid.PropertyValueChanged += new PropertyValueChangedEventHandler(propertyGrid_PropertyValueChanged); hTextBox.SelectionChanged += new EventHandler(hTextBox_SelectionChanged); hTextBox.TextChanged += new EventHandler((object s, EventArgs ea) => { if (hTextBox.Modified) { FileSaved = false; Text = CaptionFilename + " *"; } RefreshUI(); RefreshPropertyGrid(); }); RefreshUI(); }
public void Start() { DownloadCancelled = false; try { if (projectInfo.mode == ProjectInfo.HunterMode.network) { projectInfo.CreateIPC(); } //记录辞典文件的MD5码 string lastMD5 = projectInfo.LoadlastDicMD5(); //读取上次保存的辞典MD5 projectInfo.SaveDicMD5(); //保存此次的辞典MD5 if (lastMD5 != HunterUtilities.GetMD5Hash(projectInfo.dictionary)) { //如果两次MD5不一致,说明辞典文件已经改变。询问是否重置辞典 if (projectInfo.strategy.CurrentSearchProgress != 0 || projectInfo.strategy.CurrentKeywordProgress != 0) { DialogResult dr = MessageBox.Show("您的辞典已经更新。要将搜索进度置零,重新开始搜索吗?", "Hunter 3", MessageBoxButtons.YesNo, MessageBoxIcon.Question); if (dr == DialogResult.Yes) { projectInfo.strategy.CurrentKeywordProgress = 0; projectInfo.strategy.CurrentSearchProgress = 0; projectInfo.strategy.RefreshProgress(0, 0); } } } } catch (Exception e) { mHunterConsole.WriteException(e); } // // //以Mode来判断是否连接服务器 try { if (projectInfo.CurrentMode == ProjectInfo.HunterMode.network) { //尝试连接一次数据库 try { projectInfo.DatabaseHelper.database.DbOpen(); projectInfo.DatabaseHelper.database.DbClose(); } catch { MessageBox.Show("连接数据库失败,本任务改为本地模式。", "Hunter 3", MessageBoxButtons.OK, MessageBoxIcon.Information); projectInfo.CurrentMode = ProjectInfo.HunterMode.local; } } isHuntingUri = true; isDownloadingUris = true; mHunterConsole.outputStartInformation(DateTime.Now, projectInfo.timeout.ToString()); for (int i = 0; i < hunterThreads.Length; i++) { if (projectInfo.CurrentMode != ProjectInfo.HunterMode.local) { hunterThreads[i].databaseHelper = new HunterDatabaseHelper(projectInfo); //hunterThreads[i].databaseHelper.connect(); } hunterThreads[i].downloadThread.Start(hunterThreads[i]); } if (mHunterConfig.UseProxy) { for (int i = 0; i < ProxyGetThreads.Length; i++) { ProxyGetThreads[i].Start(); } } thHuntUris.Start(); } catch (Exception e) { mHunterConsole.WriteException(e); } }
public Hunter(HunterConsole oh, HunterConfig config, ProjectInfo _pj, HunterForm main) { try { MainForm = main; Error = false; mHunterConsole = oh; projectInfo = _pj; mHunterConfig = config; ProxyFetcher = new HunterProxyFetcher(AvailableProxies); projectInfo = ProjectInfo.LoadProject(_pj.mHunterConsole, _pj.projectPath, _pj.strategyPath, true); downloadThreadNum = int.Parse(projectInfo.threadnum); hunterThreads = new HunterDownloadThread[downloadThreadNum]; //获取代理的线程 ProxyGetThreads = new Thread[downloadThreadNum]; if (mHunterConfig.UseProxy == true) { FileStream fs = new FileStream("proxy.hip", FileMode.Open, FileAccess.Read); StreamReader sr = new StreamReader(fs); ProxyText = sr.ReadToEnd(); sr.Close(); fs.Close(); AllProxies = HunterProxy.GetProxy(ProxyText, mHunterConfig.ProxyFilterKeywords); } mHunterConsole.WriteMessage(projectInfo.ConfigInformation()); mHunterConsole.WriteMessage(""); mHunterConsole.WriteMessage(projectInfo.strategy.GetStrategyInformation()); xmlDatabase = new XMLDatabase(projectInfo.database, mHunterConsole); xmlDatabase.openDatabase(); try { if (downloadThreadNum <= 0) { mHunterConsole.WriteMessage("配置错误:下载线程数不能小于0。"); return; } } catch (Exception ex) { mHunterConsole.WriteException(ex); return; } for (int i = 0; i < hunterThreads.Length; i++) { hunterThreads[i] = new HunterDownloadThread(); hunterThreads[i].downloadThread = new Thread(threadDownloadUris); } if (mHunterConfig.UseProxy) { for (int i = 0; i < ProxyGetThreads.Length; i++) { ProxyGetThreads[i] = new Thread(GetAvaliableProxies); } } if (projectInfo.strategy.Keywords.Count <= 0) { projectInfo.mHunterConsole.WriteMessage("没有找到关键字,任务取消。"); Error = true; return; } thHuntUris = new Thread(threadHuntUris); thHuntUris.SetApartmentState(ApartmentState.STA); hUri = new HunterUri(this); projectInfo.strategy.RecordFirstWord(); mHunterConsole.WriteMessage("下载线程总数:" + hunterThreads.Length); mHunterConsole.WriteMessage("读取配置完毕。"); mHunterConsole.WriteMessage("正在运行任务..."); } catch (Exception e) { mHunterConsole.WriteException(e); } }