private void CreateDirAndDownLoad(filepath entity) { var path = entity.file_Path; if (string.IsNullOrEmpty(path)) { return; } var dirName = entity.file_innerTxt; //List<string> dirPathNameList = _dirPathList.Where(item => item.file_Path == entity.file_parent_path).Select(item => item.file_innerTxt).Distinct( ).ToList(); //string dealString = (string.IsNullOrEmpty(dirName) ? (dirPathNameList.Count != 0 ? dirPathNameList[0].ToString( ) : (fileNames[fileNames.Length - 3] + @"\" // + fileNames[fileNames.Length - 2])) : dirName); //dealString = dealString.ToCharArray( ).Where(ch => !(@"\/*|:?*<> ".ToCharArray( ).Contains(ch))).Aggregate(string.Empty, ( f, ch ) => f + ch); try { var fileNames = path.Split('/'); if (fileNames.Length == 0) { return; } var fileName = fileNames[fileNames.Length - 1]; var dealString = string.IsNullOrEmpty(dirName) ? fileNames[fileNames.Length - 3] + @"\" + fileNames[fileNames.Length - 2] : dirName; //处理目录中非法的字符 dealString = dealString.ToCharArray() .Where(ch => !@"\/*|:?*<> ".ToCharArray().Contains(ch)) .Aggregate(string.Empty, (f, ch) => f + ch); var createDir = (SaveFilePath.EndsWith(@"\") ? SaveFilePath : SaveFilePath + @"\") + dealString + @"\"; if (!Directory.Exists(createDir)) { try { Directory.CreateDirectory(createDir); } catch { MyMessageBox.Add(string.Format("路径 {0} 存在错误!文件名 {1} ", createDir, fileName)); Console.WriteLine("路径 {0} 存在错误!文件名 {1} ", createDir, fileName); } } fileName = createDir + fileName; Tool.DownLoad(path, fileName); // Console.WriteLine("线程 {0} 执行完了! 下载地址 {1} 本机地址 {2} ", Thread.CurrentThread.ManagedThreadId, path, fileName); } catch (Exception) { // Console.WriteLine(e.Data + "\n" + e.Message); } }
/// <summary> /// attrName /// 属性名,如"src"、"img" /// fileType /// 1.目录 /// 2.文件地址 /// </summary> /// <param name="attrName">获取指定内容</param> /// <param name="fileType"></param> public bool run(string attrName, string URL, int fileType) { bool Successed = false; HtmlNodeCollection hnCollection; HtmlNodeCollection titleCollection; //获取目录地址 try { HtmlDocument htmlDocument = HtmlTool.LoadHtml(URL); hnCollection = HtmlTool.GetNodeCollect(htmlDocument, HtmlModelTool.htmlModel.Match); titleCollection = HtmlTool.GetNodeCollect(htmlDocument, "//head/title"); } catch (Exception EX) { MyMessageBox.Add(string.Format("线程 {0} 获取文件 {1} 时发生了错误,错误信息 {2} ,错误详情 {3} ", System.Threading.Thread.CurrentThread.ManagedThreadId, URL, EX.Message, EX.Data)); Console.WriteLine("线程 {0} 获取文件 {1} 时发生了错误,错误信息 {2} ,错误详情 {3} ", System.Threading.Thread.CurrentThread.ManagedThreadId, URL, EX.Message, EX.Data); return(Successed); } if (hnCollection == null) { MyMessageBox.Add(string.Format("线程 {0} 获取文件 {1} 时发生了错误 ,未能加载网页!", System.Threading.Thread.CurrentThread.ManagedThreadId, URL)); Console.WriteLine("线程 {0} 获取文件 {1} 时发生了错误 ,未能加载网页!", System.Threading.Thread.CurrentThread.ManagedThreadId, URL); return(Successed); } // Console.WriteLine("线程 {0} 获取文件 {1} 正在操作,锁定中……", System.Threading.Thread.CurrentThread.ManagedThreadId, URL); foreach (HtmlNode hn in hnCollection) { string path = hn.Attributes[attrName].Value; string innerTxt = string.IsNullOrEmpty(hn.InnerHtml) ? (!string.IsNullOrEmpty(hn.InnerText)? hn.InnerText : (titleCollection != null ?(titleCollection.Count > 0?titleCollection[0].InnerHtml : "") :"")):hn.InnerHtml; if (!string.IsNullOrEmpty(path)) { //lock (locker) //{ if (!dirPath.Contains(path)) { dirPath.Add(path); try { if (path.ToUpper().StartsWith("READ")) { int loc = path.LastIndexOf("&"); path = path.Substring(0, loc); } InsertfilePath(path, innerTxt, fileType, 0, URL); } catch { return(Successed); } Successed = true; } else { Successed = true; } //} } } // Console.WriteLine("线程 {0} 获取文件 {1} 操作完成了,锁定解除了!", System.Threading.Thread.CurrentThread.ManagedThreadId, URL); return(Successed); }
public void GetService(int fileTypeId) { if (string.IsNullOrEmpty(SaveFilePath) || !Directory.Exists(SaveFilePath)) { MyMessageBox.Add("保存地址有误"); Console.WriteLine("保存地址有误"); return; } #region 废弃 //string picPath = @"E:\Test\picpath.txt"; //string downPath = @"E:\Test\picDone.txt"; //读取未下载的图片地址 // List<string> picPathList = Tool.ReadTxt(picPath); //读取已经下载完成的地址 //List<string> DoneList = Tool.ReadTxt(downPath); //移除已经下载完成的 //foreach (string doneItem in DoneList) //{ // if (picPathList.Contains(doneItem)) // picPathList.Remove(doneItem); //} #endregion #region 旧的 // List<string> picPathList = Tool.ReadPathByMySQL(filetype, 0); // ////获取所属目录 //// _dirPathList = Tool.ReadPathByLinq(filetype - 1, 4); // Parallel.ForEach(picPathList, path => // { // dosomething(path); // }); #endregion #region //获取未下载的地址 _filePathList = Tool.ReadPathByLinq(fileTypeId, 0); //最好不要使用全局变量 Parallel.ForEach(_filePathList, CreateDirAndDownLoad); //foreach (filepath entity in _filePathList) //{ //new Thread(new ThreadStart(()=> //{ // CreateDirAndDownLoad(entity); //})).Start(); //Task.Factory.StartNew(()=> //{ // CreateDirAndDownLoad(entity); //}); //} #endregion }
/// <summary> /// 提供文件类型与获取属性获取文件地址 /// file_type_id 获取的文件类型 0 顶层结构 1 目录结构 2 文件结构 /// </summary> /// <param name="fileTypeId">当前要获取的文件类型</param> public void GetService(int fileTypeId) { if (!Tool.validateHtml(HtmlModelTool.htmlModel.BasePath)) { MyMessageBox.Add("网站基址不是正确的格式"); return; } if (String.IsNullOrEmpty(HtmlModelTool.htmlModel.Match) || String.IsNullOrEmpty(HtmlModelTool.htmlModel.AttrName)) { MyMessageBox.Add("匹配字符不可为空"); Console.WriteLine("匹配字符不可为空"); return; } #region MyRegion //if(!ConnectionStatusTool.CheckServeStatus( _netPath).Equals("200")) //{ // Console.WriteLine("网络故障!地址不可访问!"); // return; //} // string netPath = "http://w1.vt97.biz/pw/thread.php?fid=16&page="; //获取目录地址 //string dirXpath = "//tr[@class='tr3 t_one']/td/h3/a"; // List<string> dirpathList = Tool.ReadPathByMySQL(1,2); #endregion GetHtml gt = new GetHtml(); //gt.Match = FileXpath; // gt.Html.Match = FileXpath; //List<string> currentPathList = new List<string>( ); //获取数据库中未操作和失败的 List <string> currentPathList = new List <string>(); if (fileTypeId.ToString().EndsWith("1")) { //获取目录地址 string MaxDirPath = ConfigerHelper.GetAppConfig("MaxDirPath"); int maxDirPath = Convert.ToInt32(MaxDirPath); currentPathList = Tool.ReadPathByLinq(fileTypeId - 1, 3) .Where(p => p.file_Path.StartsWith(HtmlModelTool.htmlModel.BasePath)) .Select(p => p.file_Path) .Take(maxDirPath) .ToList(); if (currentPathList.Count == 0) { string netpath = Tool.ConcatHttpPath(HtmlModelTool.htmlModel.BasePath, HtmlModelTool.htmlModel.ExtendPath); Tool.CreateRootDir(netpath); GetService(fileTypeId); } } else { //获取文件地址 currentPathList = Tool.ReadPathByLinq(fileTypeId - 1, 3).Select(p => p.file_Path).ToList(); } //-------------------------------------------------------- //List<filepath> currentPathListEntity = new List<filepath>(); //List<filepath> filterPathListEntity = Tool.ReadPathByLinq(fileTypeId - 1, 3); //-------------------------------------------------------- #region old 弃用 //不输入网址则取数据库中的网址 //if (string.IsNullOrEmpty(NetPath) || fileTypeId.ToString().EndsWith("2")) //{ // //读取目录地址,即当前访问的地址 // currentPathList = filterPathList; //} //else //{ // #region 创建根目录 // //创建根目录 // if (currentPathList.Count == 0) // if (String.IsNullOrEmpty(NetPath)) // { // MyMessageBox.Add("没有查询到可访问的目录!可能未填写地址?"); // Console.WriteLine("没有查询到可访问的目录!可能未填写地址?"); // return; // } // else // { // for (int i = 1; i < 20; i++) // { // string url = NetPath + i.ToString(); // if (!currentPathList.Contains(url) && !filterPathList.Contains(url)) // { // currentPathList.Add(url); // filepath filepath = new filepath() // { // file_Path = url, // file_innerTxt = "", // file_Type_id = fileTypeId - 1, // file_status_id = 0, // file_CreateTime = DateTime.Now, // file_parent_path = "0" // }; // Tool.InsertfilePathByLinq(filepath); // } // } // } // if (currentPathList.Count == 0) // { // if (filterPathList.Count == 0) // { // MyMessageBox.Add("没有查询到可访问的目录!"); // Console.WriteLine("没有查询到可访问的目录!"); // return; // } // currentPathList = filterPathList; // } // #endregion //} #endregion //获取数据库中已经有的文件地址,即过滤这些地址 List <string> targetPathList = Tool.ReadPathByLinq(fileTypeId, 4).Select(p => p.file_Path).ToList(); gt.dirPath = targetPathList; //-------------------------------------------------------- //List<filepath> targetPathListEntity = Tool.ReadPathByLinq(fileTypeId, 4); //gt.dirPathEntity = targetPathListEntity; //-------------------------------------------------------- #region 测试用 //foreach (string item in currentPathList) //{ // string url = string.Empty; // if (item.ToUpper( ).StartsWith("HTTP")) // { // url = item; // } // else // url = _basePath + item; // if (gt.run(_PropertyName, url, file_type_id)) // { // Tool.UpdatefilePath(url, file_type_id - 1, 1); // Console.WriteLine("线程 {0} 已经完成了文件 {1} 的获取!", Thread.CurrentThread.ManagedThreadId, url); // } // else // { // Tool.UpdatefilePath(url, file_type_id - 1, 2); // Console.WriteLine("线程 {0} 对 {1} 的获取失败了!", Thread.CurrentThread.ManagedThreadId, url); // } //} //Parallel.ForEach(currentPathListEntity, item => //{ // string url = string.Empty; // if (item.file_Path.ToUpper().StartsWith("HTTP")) // { // url = item.file_Path; // } // else // url = BasePath + item; // if (gt.run(PropertyName, url, fileTypeId)) // { // Tool.UpdatefilePath(item, fileTypeId - 1, 1); // Console.WriteLine("线程 {0} 已经完成了文件 {1} 的获取!", Thread.CurrentThread.ManagedThreadId, url); // } // else // { // Tool.UpdatefilePath(item, fileTypeId - 1, 2); // Console.WriteLine("线程 {0} 对 {1} 的获取失败了!", Thread.CurrentThread.ManagedThreadId, url); // } //}); #endregion Parallel.ForEach(currentPathList, item => { string url = string.Empty; if (item.ToUpper().StartsWith("HTTP")) { url = item; } else { url = Tool.ConcatHttpPath(HtmlModelTool.htmlModel.BasePath, item); } // url =String.Concat( HtmlModelTool.htmlModel.BasePath,'/' , item); // Console.WriteLine("线程 {0} 已经完成了文件 {1} 的获取!", Thread.CurrentThread.ManagedThreadId, url); gt = new GetHtml() { // Match = FileXpath, dirPath = targetPathList }; //gt.Html.Match = FileXpath; if (gt.run(HtmlModelTool.htmlModel.AttrName, url, fileTypeId)) { lock (locker) Tool.UpdatefilePath(item, fileTypeId - 1, 1); MyMessageBox.Add($"线程 {Thread.CurrentThread.ManagedThreadId} 已经完成了文件 {url} 的获取!"); Console.WriteLine("线程 {0} 已经完成了文件 {1} 的获取!", Thread.CurrentThread.ManagedThreadId, url); } else { lock (locker) Tool.UpdatefilePath(item, fileTypeId - 1, 2); // ReSharper disable once UseStringInterpolation MyMessageBox.Add(string.Format("线程 {0} 对 {1} 的获取失败了!", Thread.CurrentThread.ManagedThreadId, url)); Console.WriteLine("线程 {0} 对 {1} 的获取失败了!", Thread.CurrentThread.ManagedThreadId, url); } }); }