예제 #1
0
        private void CreateDirAndDownLoad(filepath entity)
        {
            var path = entity.file_Path;

            if (string.IsNullOrEmpty(path))
            {
                return;
            }
            var dirName = entity.file_innerTxt;

            //List<string> dirPathNameList = _dirPathList.Where(item => item.file_Path == entity.file_parent_path).Select(item => item.file_innerTxt).Distinct( ).ToList();
            //string dealString = (string.IsNullOrEmpty(dirName) ? (dirPathNameList.Count != 0 ? dirPathNameList[0].ToString( ) : (fileNames[fileNames.Length - 3] + @"\"
            //      + fileNames[fileNames.Length - 2])) : dirName);
            //dealString = dealString.ToCharArray( ).Where(ch => !(@"\/*|:?*<> ".ToCharArray( ).Contains(ch))).Aggregate(string.Empty, ( f, ch ) => f + ch);

            try
            {
                var fileNames = path.Split('/');
                if (fileNames.Length == 0)
                {
                    return;
                }
                var fileName = fileNames[fileNames.Length - 1];

                var dealString = string.IsNullOrEmpty(dirName)
                    ? fileNames[fileNames.Length - 3] + @"\"
                                 + fileNames[fileNames.Length - 2]
                    : dirName;
                //处理目录中非法的字符
                dealString =
                    dealString.ToCharArray()
                    .Where(ch => !@"\/*|:?*<> ".ToCharArray().Contains(ch))
                    .Aggregate(string.Empty, (f, ch) => f + ch);
                var createDir =
                    (SaveFilePath.EndsWith(@"\") ? SaveFilePath : SaveFilePath + @"\")
                    + dealString + @"\";

                if (!Directory.Exists(createDir))
                {
                    try
                    {
                        Directory.CreateDirectory(createDir);
                    }
                    catch
                    {
                        MyMessageBox.Add(string.Format("路径 {0} 存在错误!文件名 {1} ", createDir, fileName));
                        Console.WriteLine("路径 {0} 存在错误!文件名 {1} ", createDir, fileName);
                    }
                }
                fileName = createDir + fileName;
                Tool.DownLoad(path, fileName);
                //  Console.WriteLine("线程 {0} 执行完了! 下载地址 {1} 本机地址 {2} ", Thread.CurrentThread.ManagedThreadId, path, fileName);
            }
            catch (Exception)
            {
                // Console.WriteLine(e.Data + "\n" + e.Message);
            }
        }
예제 #2
0
        /// <summary>
        /// attrName
        /// 属性名,如"src"、"img"
        /// fileType
        /// 1.目录
        /// 2.文件地址
        /// </summary>
        /// <param name="attrName">获取指定内容</param>
        /// <param name="fileType"></param>
        public bool run(string attrName, string URL, int fileType)
        {
            bool Successed = false;
            HtmlNodeCollection hnCollection;
            HtmlNodeCollection titleCollection;

            //获取目录地址
            try
            {
                HtmlDocument htmlDocument = HtmlTool.LoadHtml(URL);
                hnCollection    = HtmlTool.GetNodeCollect(htmlDocument, HtmlModelTool.htmlModel.Match);
                titleCollection = HtmlTool.GetNodeCollect(htmlDocument, "//head/title");
            }
            catch (Exception EX)
            {
                MyMessageBox.Add(string.Format("线程 {0} 获取文件 {1} 时发生了错误,错误信息 {2} ,错误详情 {3} ", System.Threading.Thread.CurrentThread.ManagedThreadId, URL, EX.Message, EX.Data));
                Console.WriteLine("线程 {0} 获取文件 {1} 时发生了错误,错误信息 {2} ,错误详情 {3} ", System.Threading.Thread.CurrentThread.ManagedThreadId, URL, EX.Message, EX.Data);

                return(Successed);
            }


            if (hnCollection == null)
            {
                MyMessageBox.Add(string.Format("线程 {0} 获取文件 {1} 时发生了错误 ,未能加载网页!", System.Threading.Thread.CurrentThread.ManagedThreadId, URL));
                Console.WriteLine("线程 {0} 获取文件 {1} 时发生了错误 ,未能加载网页!", System.Threading.Thread.CurrentThread.ManagedThreadId, URL);

                return(Successed);
            }
            // Console.WriteLine("线程 {0} 获取文件 {1} 正在操作,锁定中……", System.Threading.Thread.CurrentThread.ManagedThreadId, URL);
            foreach (HtmlNode hn in hnCollection)
            {
                string path     = hn.Attributes[attrName].Value;
                string innerTxt = string.IsNullOrEmpty(hn.InnerHtml) ? (!string.IsNullOrEmpty(hn.InnerText)? hn.InnerText : (titleCollection != null ?(titleCollection.Count > 0?titleCollection[0].InnerHtml : "") :"")):hn.InnerHtml;
                if (!string.IsNullOrEmpty(path))
                {
                    //lock (locker)
                    //{
                    if (!dirPath.Contains(path))
                    {
                        dirPath.Add(path);
                        try
                        {
                            if (path.ToUpper().StartsWith("READ"))
                            {
                                int loc = path.LastIndexOf("&");
                                path = path.Substring(0, loc);
                            }

                            InsertfilePath(path, innerTxt, fileType, 0, URL);
                        }
                        catch
                        {
                            return(Successed);
                        }
                        Successed = true;
                    }
                    else
                    {
                        Successed = true;
                    }
                    //}
                }
            }
            //  Console.WriteLine("线程 {0} 获取文件 {1} 操作完成了,锁定解除了!", System.Threading.Thread.CurrentThread.ManagedThreadId, URL);
            return(Successed);
        }
예제 #3
0
        public void GetService(int fileTypeId)
        {
            if (string.IsNullOrEmpty(SaveFilePath) || !Directory.Exists(SaveFilePath))
            {
                MyMessageBox.Add("保存地址有误");
                Console.WriteLine("保存地址有误");
                return;
            }

            #region 废弃

            //string picPath = @"E:\Test\picpath.txt";
            //string downPath = @"E:\Test\picDone.txt";
            //读取未下载的图片地址
            // List<string> picPathList = Tool.ReadTxt(picPath);

            //读取已经下载完成的地址
            //List<string> DoneList = Tool.ReadTxt(downPath);
            //移除已经下载完成的
            //foreach (string doneItem in DoneList)
            //{
            //    if (picPathList.Contains(doneItem))
            //        picPathList.Remove(doneItem);
            //}

            #endregion

            #region 旧的

            //  List<string> picPathList = Tool.ReadPathByMySQL(filetype, 0);
            //  ////获取所属目录
            ////  _dirPathList = Tool.ReadPathByLinq(filetype - 1, 4);

            //  Parallel.ForEach(picPathList, path =>
            //  {
            //      dosomething(path);
            //  });

            #endregion

            #region

            //获取未下载的地址
            _filePathList = Tool.ReadPathByLinq(fileTypeId, 0);

            //最好不要使用全局变量
            Parallel.ForEach(_filePathList, CreateDirAndDownLoad);


            //foreach (filepath entity in _filePathList)
            //{
            //new Thread(new ThreadStart(()=>
            //{
            //    CreateDirAndDownLoad(entity);
            //})).Start();
            //Task.Factory.StartNew(()=>
            //{
            //    CreateDirAndDownLoad(entity);
            //});
            //}

            #endregion
        }
예제 #4
0
        /// <summary>
        /// 提供文件类型与获取属性获取文件地址
        /// file_type_id 获取的文件类型 0 顶层结构 1 目录结构 2 文件结构
        /// </summary>
        /// <param name="fileTypeId">当前要获取的文件类型</param>
        public void GetService(int fileTypeId)
        {
            if (!Tool.validateHtml(HtmlModelTool.htmlModel.BasePath))
            {
                MyMessageBox.Add("网站基址不是正确的格式");
                return;
            }
            if (String.IsNullOrEmpty(HtmlModelTool.htmlModel.Match) || String.IsNullOrEmpty(HtmlModelTool.htmlModel.AttrName))
            {
                MyMessageBox.Add("匹配字符不可为空");
                Console.WriteLine("匹配字符不可为空");
                return;
            }

            #region MyRegion

            //if(!ConnectionStatusTool.CheckServeStatus( _netPath).Equals("200"))
            //{
            //    Console.WriteLine("网络故障!地址不可访问!");
            //    return;
            //}
            //  string netPath = "http://w1.vt97.biz/pw/thread.php?fid=16&page=";

            //获取目录地址
            //string dirXpath = "//tr[@class='tr3 t_one']/td/h3/a";
            // List<string> dirpathList = Tool.ReadPathByMySQL(1,2);

            #endregion

            GetHtml gt = new GetHtml();
            //gt.Match = FileXpath;
            // gt.Html.Match = FileXpath;

            //List<string> currentPathList = new List<string>( );
            //获取数据库中未操作和失败的
            List <string> currentPathList = new List <string>();
            if (fileTypeId.ToString().EndsWith("1"))
            {
                //获取目录地址
                string MaxDirPath = ConfigerHelper.GetAppConfig("MaxDirPath");
                int    maxDirPath = Convert.ToInt32(MaxDirPath);
                currentPathList =
                    Tool.ReadPathByLinq(fileTypeId - 1, 3)
                    .Where(p => p.file_Path.StartsWith(HtmlModelTool.htmlModel.BasePath))
                    .Select(p => p.file_Path)
                    .Take(maxDirPath)
                    .ToList();
                if (currentPathList.Count == 0)
                {
                    string netpath = Tool.ConcatHttpPath(HtmlModelTool.htmlModel.BasePath, HtmlModelTool.htmlModel.ExtendPath);
                    Tool.CreateRootDir(netpath);
                    GetService(fileTypeId);
                }
            }
            else
            {
                //获取文件地址
                currentPathList = Tool.ReadPathByLinq(fileTypeId - 1, 3).Select(p => p.file_Path).ToList();
            }

            //--------------------------------------------------------
            //List<filepath> currentPathListEntity = new List<filepath>();
            //List<filepath> filterPathListEntity = Tool.ReadPathByLinq(fileTypeId - 1, 3);
            //--------------------------------------------------------

            #region old 弃用

            //不输入网址则取数据库中的网址
            //if (string.IsNullOrEmpty(NetPath) || fileTypeId.ToString().EndsWith("2"))
            //{
            //    //读取目录地址,即当前访问的地址
            //    currentPathList = filterPathList;
            //}
            //else
            //{
            //    #region 创建根目录
            //    //创建根目录

            //    if (currentPathList.Count == 0)
            //        if (String.IsNullOrEmpty(NetPath))
            //        {
            //            MyMessageBox.Add("没有查询到可访问的目录!可能未填写地址?");
            //            Console.WriteLine("没有查询到可访问的目录!可能未填写地址?");
            //            return;
            //        }
            //        else
            //        {

            //            for (int i = 1; i < 20; i++)
            //            {
            //                string url = NetPath + i.ToString();
            //                if (!currentPathList.Contains(url) && !filterPathList.Contains(url))
            //                {
            //                    currentPathList.Add(url);
            //                    filepath filepath = new filepath()
            //                    {
            //                        file_Path = url,
            //                        file_innerTxt = "",
            //                        file_Type_id = fileTypeId - 1,
            //                        file_status_id = 0,
            //                        file_CreateTime = DateTime.Now,
            //                        file_parent_path = "0"
            //                    };
            //                    Tool.InsertfilePathByLinq(filepath);
            //                }
            //            }

            //        }
            //    if (currentPathList.Count == 0)
            //    {
            //        if (filterPathList.Count == 0)
            //        {
            //            MyMessageBox.Add("没有查询到可访问的目录!");
            //            Console.WriteLine("没有查询到可访问的目录!");
            //            return;
            //        }

            //        currentPathList = filterPathList;

            //    }
            //    #endregion

            //}

            #endregion

            //获取数据库中已经有的文件地址,即过滤这些地址
            List <string> targetPathList = Tool.ReadPathByLinq(fileTypeId, 4).Select(p => p.file_Path).ToList();

            gt.dirPath = targetPathList;

            //--------------------------------------------------------
            //List<filepath> targetPathListEntity = Tool.ReadPathByLinq(fileTypeId, 4);
            //gt.dirPathEntity = targetPathListEntity;
            //--------------------------------------------------------

            #region 测试用

            //foreach (string item in currentPathList)
            //{
            //    string url = string.Empty;
            //    if (item.ToUpper( ).StartsWith("HTTP"))
            //    {
            //        url = item;
            //    }
            //    else
            //        url = _basePath + item;
            //    if (gt.run(_PropertyName, url, file_type_id))
            //    {
            //        Tool.UpdatefilePath(url, file_type_id - 1, 1);
            //        Console.WriteLine("线程 {0} 已经完成了文件 {1} 的获取!", Thread.CurrentThread.ManagedThreadId, url);
            //    }
            //    else
            //    {
            //        Tool.UpdatefilePath(url, file_type_id - 1, 2);
            //        Console.WriteLine("线程 {0} 对 {1} 的获取失败了!", Thread.CurrentThread.ManagedThreadId, url);
            //    }
            //}
            //Parallel.ForEach(currentPathListEntity, item =>
            //{
            //    string url = string.Empty;
            //    if (item.file_Path.ToUpper().StartsWith("HTTP"))
            //    {
            //        url = item.file_Path;
            //    }
            //    else
            //        url = BasePath + item;
            //    if (gt.run(PropertyName, url, fileTypeId))
            //    {
            //        Tool.UpdatefilePath(item, fileTypeId - 1, 1);
            //        Console.WriteLine("线程 {0} 已经完成了文件 {1} 的获取!", Thread.CurrentThread.ManagedThreadId, url);
            //    }
            //    else
            //    {
            //        Tool.UpdatefilePath(item, fileTypeId - 1, 2);
            //        Console.WriteLine("线程 {0} 对 {1} 的获取失败了!", Thread.CurrentThread.ManagedThreadId, url);
            //    }
            //});

            #endregion


            Parallel.ForEach(currentPathList, item =>
            {
                string url = string.Empty;
                if (item.ToUpper().StartsWith("HTTP"))
                {
                    url = item;
                }
                else
                {
                    url = Tool.ConcatHttpPath(HtmlModelTool.htmlModel.BasePath, item);
                }
                // url =String.Concat( HtmlModelTool.htmlModel.BasePath,'/' , item);
                //  Console.WriteLine("线程 {0} 已经完成了文件 {1} 的获取!", Thread.CurrentThread.ManagedThreadId, url);
                gt = new GetHtml()
                {
                    // Match = FileXpath,
                    dirPath = targetPathList
                };
                //gt.Html.Match = FileXpath;
                if (gt.run(HtmlModelTool.htmlModel.AttrName, url, fileTypeId))
                {
                    lock (locker)
                        Tool.UpdatefilePath(item, fileTypeId - 1, 1);
                    MyMessageBox.Add($"线程 {Thread.CurrentThread.ManagedThreadId} 已经完成了文件 {url} 的获取!");
                    Console.WriteLine("线程 {0} 已经完成了文件 {1} 的获取!", Thread.CurrentThread.ManagedThreadId, url);
                }
                else
                {
                    lock (locker)
                        Tool.UpdatefilePath(item, fileTypeId - 1, 2);
                    // ReSharper disable once UseStringInterpolation
                    MyMessageBox.Add(string.Format("线程 {0} 对 {1} 的获取失败了!", Thread.CurrentThread.ManagedThreadId, url));
                    Console.WriteLine("线程 {0} 对 {1} 的获取失败了!", Thread.CurrentThread.ManagedThreadId, url);
                }
            });
        }