public static void Analyze(string url) { var fileinfos = File.ReadAllLines(fileinfoLog, Encoding.UTF8); //class="wrapper-piclist" var todayDir = Path.Combine(fileDir, DateTime.Now.ToString("yyyyMMdd")); if (!Directory.Exists(todayDir)) { Directory.CreateDirectory(todayDir); } HtmlWeb web = new HtmlWeb(); var doc = web.Load(url); var divNode = doc.DocumentNode.SelectSingleNode(@"//div[@class='wrapper-piclist']"); var liNodes = divNode.Descendants("li"); foreach (var liNode in liNodes) { var aNode = liNode.Descendants("a").FirstOrDefault(); if (aNode == null) { continue; } var title = aNode.GetAttributeValue("title", "").Trim(); title = VideoSpiderTools.ReplaceQuote(title); if (string.IsNullOrEmpty(title)) { title = Guid.NewGuid().ToString(); } if (fileinfos.Contains(title)) { Console.WriteLine(title + " 已存在"); continue; } //var tempforder = Path.Combine(todayDir,title); //if (Directory.Exists(tempforder)) continue; //Directory.CreateDirectory(tempforder); Console.WriteLine("获取标题:" + title); var href = aNode.GetAttributeValue("href", "").Trim(); if (string.IsNullOrEmpty(href)) { continue; } //http://www.iqiyi.com/v_19rre87l8k.html#vfrm=2-4-0-1 var childurl = href.Split('#')[0]; //var path = Path.Combine(todayDir, title + ".mp4"); if (VideoSpiderTools.YoutubedlDownload(childurl, todayDir)) { VideoSpiderTools.RecordFile(title, fileinfoLog); Console.WriteLine(title + " 下载完成"); } else { Console.WriteLine(title + " 下载失败"); } } }
public static void SpiderRun() { Console.WriteLine("spider tools:youtube-dl,youget"); Console.WriteLine("please input url;"); var url = Console.ReadLine(); url = url.Trim(); var basePath = youtubeVideoPath; if (url.Contains("iqiyi.com")) { url = url + "\\"; basePath = iqiyiVideoPath; } if (url.Contains("bilibili.com")) { basePath = bilibiliVideoPath; } if (url.Contains("youku.com")) { basePath = youkuVideoPath; } if (url.Contains("qq.com")) { basePath = tencentVideoPath; } if (url.Contains("sohu.com")) { basePath = sohuVideoPath; } if (!Directory.Exists(basePath)) { Directory.CreateDirectory(basePath); } var todayDir = Path.Combine(basePath, DateTime.Now.ToString("yyyyMMdd")); if (!Directory.Exists(todayDir)) { Directory.CreateDirectory(todayDir); } var cutEnd = false; if (url.EndsWith("@")) { cutEnd = true; } url = url.Replace("@", "").Trim(); if (url.Contains("qq.com")) { VideoSpiderTools.YouGetDownLoad(url, todayDir, cutEnd); } else { VideoSpiderTools.YoutubedlDownload(url, todayDir, cutEnd); } }
public static void CollectYoutubeVideos(string url) { Console.WriteLine("开始采集"); var htmlContent = NetWorkHandle.GetHtmlContent(url).Item2; var userName = ""; HtmlDocument htmlDoc = new HtmlDocument(); htmlDoc.LoadHtml(htmlContent); var liNode = htmlDoc.DocumentNode.SelectSingleNode(@"//li[@class='author-attribution']"); if (liNode is null) { var titleSpanNode = htmlDoc.DocumentNode.SelectSingleNode(@"//meta[@name='title']"); userName = titleSpanNode.GetAttributeValue("content", "");// titleSpanNode.InnerText.Trim().ToLower(); } else { userName = liNode.InnerText.Trim().ToLower(); } userName = HttpUtility.HtmlDecode(userName); userName = userName.Replace(" ", "_").Replace(" ", "_"); if (!Directory.Exists(youtubeUserVideoPath)) { Directory.CreateDirectory(youtubeUserVideoPath); } var currentUserPath = Path.Combine(youtubeUserVideoPath, userName); if (!Directory.Exists(currentUserPath)) { Directory.CreateDirectory(currentUserPath); } var logPath = Path.Combine(currentUserPath, "video_list.log"); if (!File.Exists(logPath)) { File.WriteAllText(logPath, "", Encoding.UTF8); } var childUrls = new List <string>(); if (new Regex("/user/[^/]+/video").IsMatch(url)) { childUrls = AnalyseVideoUrlListByUserVideoUrl(url); } else { childUrls = AnalyseVideoList(url); } var existUrls = File.ReadAllLines(logPath, Encoding.UTF8); foreach (var childUrl in childUrls) { Console.WriteLine("开始下载--" + childUrl); if (existUrls.Contains(childUrl)) { continue; } VideoSpiderTools.YoutubedlDownload(childUrl, currentUserPath, false); LogHelper.WriteLogs(childUrl, logPath); Thread.Sleep(2000); } }
private static void DownloadByUser(string url) { Console.WriteLine("请输入采集的页数:"); var numStr = Console.ReadLine(); var pageNum = Convert.ToInt32(numStr); Console.WriteLine("请输入最低播放量:"); var viewCountStr = Console.ReadLine(); var viewCount = Convert.ToInt32(viewCountStr); Console.WriteLine("开始采集"); HtmlWeb web = new HtmlWeb(); var doc = web.Load(url); var userNameNode = doc.DocumentNode.SelectSingleNode(@"//span[@id='userInfoNick']"); if (userNameNode is null) { Console.WriteLine("无法获取username,停止采集"); return; } var userName = userNameNode.InnerText.Trim(); Console.WriteLine("user name:" + userName); var userVideoPath = Path.Combine(videoDir, userName); var logPath = Path.Combine(userVideoPath, "video_list.log"); var existVideos = new List <string>(); if (!Directory.Exists(userVideoPath)) { Directory.CreateDirectory(userVideoPath); } else { if (File.Exists(logPath)) { existVideos = File.ReadLines(logPath).ToList(); } } List <string> urls = Analyse(url, pageNum, viewCount); foreach (var childUrl in urls) { Console.WriteLine("下载--" + childUrl); try { if (existVideos.Contains(childUrl.Trim())) { continue; } var task = Task.Run(() => { if (VideoSpiderTools.YouGetDownLoad(childUrl, userVideoPath, false)) { Console.WriteLine(childUrl + "--下载成功"); LogHelper.WriteLogs(childUrl.Trim(), logPath); } else { Console.WriteLine(childUrl + "--下载失败"); } }); if (!task.Wait(TimeSpan.FromMinutes(3))) { Console.WriteLine(childUrl + "--超时退出,下载失败"); } Thread.Sleep(2000); } catch (Exception ex) { Console.WriteLine(ex.Message); } } }
private static void DownloadByList(string url) { var urls = new List <string>(); Console.WriteLine("开始采集"); HtmlWeb web = new HtmlWeb(); var doc = web.Load(url); var content = NetWorkHandle.GetHtmlContent(url); var aa = content; var ulNode = doc.DocumentNode.SelectSingleNode(@"//ul[@class='figure_list']"); if (ulNode is null) { return; } var liNodes = ulNode.Descendants("li"); if (liNodes is null) { return; } foreach (var liNode in liNodes) { var aNode = liNode.Descendants("a").FirstOrDefault(); if (aNode is null) { continue; } var href = aNode.GetAttributeValue("href", ""); var childUrl = host + href; Console.WriteLine(childUrl); urls.Add(childUrl); } var todayDir = Path.Combine(basePath, DateTime.Now.ToString("yyyyMMdd")); if (!Directory.Exists(todayDir)) { Directory.CreateDirectory(todayDir); } foreach (var childUrl in urls) { Console.WriteLine("下载--" + childUrl); try { var task = Task.Run(() => { if (VideoSpiderTools.YouGetDownLoad(childUrl, todayDir, false)) { Console.WriteLine(childUrl + "--下载成功"); //LogHelper.WriteLogs(childUrl.Trim(), logPath); } else { Console.WriteLine(childUrl + "--下载失败"); } }); if (!task.Wait(TimeSpan.FromMinutes(3))) { Console.WriteLine(childUrl + "--超时退出,下载失败"); } Thread.Sleep(2000); } catch (Exception ex) { Console.WriteLine(ex.Message); } } }
public static void DownLoadFiles() { if (!Directory.Exists(basePath)) { Directory.CreateDirectory(basePath); } if (!File.Exists(VideoInfoPath)) { File.Create(VideoInfoPath); } var results = SingleSpider(); foreach (var item in results) { var id = item.Id.ToString(); var title = item.Title.Trim(); try { if (string.IsNullOrEmpty(title)) { title = DateTime.Now.ToString("yyyyMMddHHmmss"); } var url = item.videoUrl; var videoName = title + ".mp4"; var imgName = ""; if (!string.IsNullOrEmpty(item.ImgUrl)) { var imgUrl = item.ImgUrl; Console.WriteLine($"开始下载--{item.ImgUrl}"); imgName = title + ".jpg"; var imgFilePath = Path.Combine(basePath, imgName); if (NetWorkHandle.DownFileMethod(imgUrl, imgFilePath)) { Console.WriteLine($"下载--{item.ImgUrl}--成功"); } else { Console.WriteLine($"下载--{item.ImgUrl}--失败"); } } var videoFilePath = Path.Combine(basePath, videoName); if (NetWorkHandle.DownFileMethod(url, videoFilePath)) { Console.WriteLine($"下载--{item.videoUrl}--成功"); //RecordFile(id, RecordFileName: existsFileName, path: basePath); VideoSpiderTools.RecordFile(title, recordFile: existsFileName, path: basePath); } else { Console.WriteLine($"下载--{item.videoUrl}--失败"); } Console.WriteLine($"{title}--完成"); } catch (Exception ex) { Console.WriteLine("异常:" + title + " " + ex.Message); VideoSpiderTools.RecordFile(title + " @ " + "异常信息:" + ex.Message, exceptionFileName); } } }