예제 #1
0
        public static void Analyze(string url)
        {
            var fileinfos = File.ReadAllLines(fileinfoLog, Encoding.UTF8);

            //class="wrapper-piclist"
            var todayDir = Path.Combine(fileDir, DateTime.Now.ToString("yyyyMMdd"));

            if (!Directory.Exists(todayDir))
            {
                Directory.CreateDirectory(todayDir);
            }
            HtmlWeb web     = new HtmlWeb();
            var     doc     = web.Load(url);
            var     divNode = doc.DocumentNode.SelectSingleNode(@"//div[@class='wrapper-piclist']");
            var     liNodes = divNode.Descendants("li");


            foreach (var liNode in liNodes)
            {
                var aNode = liNode.Descendants("a").FirstOrDefault();
                if (aNode == null)
                {
                    continue;
                }
                var title = aNode.GetAttributeValue("title", "").Trim();
                title = VideoSpiderTools.ReplaceQuote(title);
                if (string.IsNullOrEmpty(title))
                {
                    title = Guid.NewGuid().ToString();
                }
                if (fileinfos.Contains(title))
                {
                    Console.WriteLine(title + " 已存在");
                    continue;
                }
                //var tempforder = Path.Combine(todayDir,title);
                //if (Directory.Exists(tempforder)) continue;
                //Directory.CreateDirectory(tempforder);
                Console.WriteLine("获取标题:" + title);
                var href = aNode.GetAttributeValue("href", "").Trim();
                if (string.IsNullOrEmpty(href))
                {
                    continue;
                }
                //http://www.iqiyi.com/v_19rre87l8k.html#vfrm=2-4-0-1
                var childurl = href.Split('#')[0];
                //var path = Path.Combine(todayDir, title + ".mp4");
                if (VideoSpiderTools.YoutubedlDownload(childurl, todayDir))
                {
                    VideoSpiderTools.RecordFile(title, fileinfoLog);
                    Console.WriteLine(title + " 下载完成");
                }
                else
                {
                    Console.WriteLine(title + " 下载失败");
                }
            }
        }
예제 #2
0
        public static void SpiderRun()
        {
            Console.WriteLine("spider tools:youtube-dl,youget");
            Console.WriteLine("please input url;");
            var url = Console.ReadLine();

            url = url.Trim();
            var basePath = youtubeVideoPath;

            if (url.Contains("iqiyi.com"))
            {
                url      = url + "\\";
                basePath = iqiyiVideoPath;
            }
            if (url.Contains("bilibili.com"))
            {
                basePath = bilibiliVideoPath;
            }
            if (url.Contains("youku.com"))
            {
                basePath = youkuVideoPath;
            }
            if (url.Contains("qq.com"))
            {
                basePath = tencentVideoPath;
            }
            if (url.Contains("sohu.com"))
            {
                basePath = sohuVideoPath;
            }
            if (!Directory.Exists(basePath))
            {
                Directory.CreateDirectory(basePath);
            }
            var todayDir = Path.Combine(basePath, DateTime.Now.ToString("yyyyMMdd"));

            if (!Directory.Exists(todayDir))
            {
                Directory.CreateDirectory(todayDir);
            }
            var cutEnd = false;

            if (url.EndsWith("@"))
            {
                cutEnd = true;
            }
            url = url.Replace("@", "").Trim();
            if (url.Contains("qq.com"))
            {
                VideoSpiderTools.YouGetDownLoad(url, todayDir, cutEnd);
            }
            else
            {
                VideoSpiderTools.YoutubedlDownload(url, todayDir, cutEnd);
            }
        }
예제 #3
0
        public static void CollectYoutubeVideos(string url)
        {
            Console.WriteLine("开始采集");
            var          htmlContent = NetWorkHandle.GetHtmlContent(url).Item2;
            var          userName    = "";
            HtmlDocument htmlDoc     = new HtmlDocument();

            htmlDoc.LoadHtml(htmlContent);
            var liNode = htmlDoc.DocumentNode.SelectSingleNode(@"//li[@class='author-attribution']");

            if (liNode is null)
            {
                var titleSpanNode = htmlDoc.DocumentNode.SelectSingleNode(@"//meta[@name='title']");
                userName = titleSpanNode.GetAttributeValue("content", "");// titleSpanNode.InnerText.Trim().ToLower();
            }
            else
            {
                userName = liNode.InnerText.Trim().ToLower();
            }
            userName = HttpUtility.HtmlDecode(userName);
            userName = userName.Replace(" ", "_").Replace(" ", "_");
            if (!Directory.Exists(youtubeUserVideoPath))
            {
                Directory.CreateDirectory(youtubeUserVideoPath);
            }
            var currentUserPath = Path.Combine(youtubeUserVideoPath, userName);

            if (!Directory.Exists(currentUserPath))
            {
                Directory.CreateDirectory(currentUserPath);
            }
            var logPath = Path.Combine(currentUserPath, "video_list.log");

            if (!File.Exists(logPath))
            {
                File.WriteAllText(logPath, "", Encoding.UTF8);
            }
            var childUrls = new List <string>();

            if (new Regex("/user/[^/]+/video").IsMatch(url))
            {
                childUrls = AnalyseVideoUrlListByUserVideoUrl(url);
            }
            else
            {
                childUrls = AnalyseVideoList(url);
            }
            var existUrls = File.ReadAllLines(logPath, Encoding.UTF8);

            foreach (var childUrl in childUrls)
            {
                Console.WriteLine("开始下载--" + childUrl);
                if (existUrls.Contains(childUrl))
                {
                    continue;
                }
                VideoSpiderTools.YoutubedlDownload(childUrl, currentUserPath, false);
                LogHelper.WriteLogs(childUrl, logPath);
                Thread.Sleep(2000);
            }
        }
예제 #4
0
        private static void DownloadByUser(string url)
        {
            Console.WriteLine("请输入采集的页数:");
            var numStr  = Console.ReadLine();
            var pageNum = Convert.ToInt32(numStr);

            Console.WriteLine("请输入最低播放量:");
            var viewCountStr = Console.ReadLine();
            var viewCount    = Convert.ToInt32(viewCountStr);

            Console.WriteLine("开始采集");
            HtmlWeb web          = new HtmlWeb();
            var     doc          = web.Load(url);
            var     userNameNode = doc.DocumentNode.SelectSingleNode(@"//span[@id='userInfoNick']");

            if (userNameNode is null)
            {
                Console.WriteLine("无法获取username,停止采集");
                return;
            }
            var userName = userNameNode.InnerText.Trim();

            Console.WriteLine("user name:" + userName);
            var userVideoPath = Path.Combine(videoDir, userName);
            var logPath       = Path.Combine(userVideoPath, "video_list.log");
            var existVideos   = new List <string>();

            if (!Directory.Exists(userVideoPath))
            {
                Directory.CreateDirectory(userVideoPath);
            }
            else
            {
                if (File.Exists(logPath))
                {
                    existVideos = File.ReadLines(logPath).ToList();
                }
            }

            List <string> urls = Analyse(url, pageNum, viewCount);

            foreach (var childUrl in urls)
            {
                Console.WriteLine("下载--" + childUrl);
                try
                {
                    if (existVideos.Contains(childUrl.Trim()))
                    {
                        continue;
                    }

                    var task = Task.Run(() => {
                        if (VideoSpiderTools.YouGetDownLoad(childUrl, userVideoPath, false))
                        {
                            Console.WriteLine(childUrl + "--下载成功");
                            LogHelper.WriteLogs(childUrl.Trim(), logPath);
                        }
                        else
                        {
                            Console.WriteLine(childUrl + "--下载失败");
                        }
                    });
                    if (!task.Wait(TimeSpan.FromMinutes(3)))
                    {
                        Console.WriteLine(childUrl + "--超时退出,下载失败");
                    }
                    Thread.Sleep(2000);
                }
                catch (Exception ex)
                {
                    Console.WriteLine(ex.Message);
                }
            }
        }
예제 #5
0
        private static void DownloadByList(string url)
        {
            var urls = new List <string>();

            Console.WriteLine("开始采集");
            HtmlWeb web     = new HtmlWeb();
            var     doc     = web.Load(url);
            var     content = NetWorkHandle.GetHtmlContent(url);
            var     aa      = content;
            var     ulNode  = doc.DocumentNode.SelectSingleNode(@"//ul[@class='figure_list']");

            if (ulNode is null)
            {
                return;
            }
            var liNodes = ulNode.Descendants("li");

            if (liNodes is null)
            {
                return;
            }
            foreach (var liNode in liNodes)
            {
                var aNode = liNode.Descendants("a").FirstOrDefault();
                if (aNode is null)
                {
                    continue;
                }
                var href     = aNode.GetAttributeValue("href", "");
                var childUrl = host + href;
                Console.WriteLine(childUrl);
                urls.Add(childUrl);
            }
            var todayDir = Path.Combine(basePath, DateTime.Now.ToString("yyyyMMdd"));

            if (!Directory.Exists(todayDir))
            {
                Directory.CreateDirectory(todayDir);
            }
            foreach (var childUrl in urls)
            {
                Console.WriteLine("下载--" + childUrl);
                try
                {
                    var task = Task.Run(() => {
                        if (VideoSpiderTools.YouGetDownLoad(childUrl, todayDir, false))
                        {
                            Console.WriteLine(childUrl + "--下载成功");
                            //LogHelper.WriteLogs(childUrl.Trim(), logPath);
                        }
                        else
                        {
                            Console.WriteLine(childUrl + "--下载失败");
                        }
                    });
                    if (!task.Wait(TimeSpan.FromMinutes(3)))
                    {
                        Console.WriteLine(childUrl + "--超时退出,下载失败");
                    }
                    Thread.Sleep(2000);
                }
                catch (Exception ex)
                {
                    Console.WriteLine(ex.Message);
                }
            }
        }
예제 #6
0
        public static void DownLoadFiles()
        {
            if (!Directory.Exists(basePath))
            {
                Directory.CreateDirectory(basePath);
            }
            if (!File.Exists(VideoInfoPath))
            {
                File.Create(VideoInfoPath);
            }
            var results = SingleSpider();

            foreach (var item in results)
            {
                var id    = item.Id.ToString();
                var title = item.Title.Trim();
                try
                {
                    if (string.IsNullOrEmpty(title))
                    {
                        title = DateTime.Now.ToString("yyyyMMddHHmmss");
                    }
                    var url       = item.videoUrl;
                    var videoName = title + ".mp4";
                    var imgName   = "";
                    if (!string.IsNullOrEmpty(item.ImgUrl))
                    {
                        var imgUrl = item.ImgUrl;
                        Console.WriteLine($"开始下载--{item.ImgUrl}");
                        imgName = title + ".jpg";
                        var imgFilePath = Path.Combine(basePath, imgName);
                        if (NetWorkHandle.DownFileMethod(imgUrl, imgFilePath))
                        {
                            Console.WriteLine($"下载--{item.ImgUrl}--成功");
                        }
                        else
                        {
                            Console.WriteLine($"下载--{item.ImgUrl}--失败");
                        }
                    }

                    var videoFilePath = Path.Combine(basePath, videoName);

                    if (NetWorkHandle.DownFileMethod(url, videoFilePath))
                    {
                        Console.WriteLine($"下载--{item.videoUrl}--成功");
                        //RecordFile(id, RecordFileName: existsFileName, path: basePath);
                        VideoSpiderTools.RecordFile(title, recordFile: existsFileName, path: basePath);
                    }
                    else
                    {
                        Console.WriteLine($"下载--{item.videoUrl}--失败");
                    }
                    Console.WriteLine($"{title}--完成");
                }
                catch (Exception ex)
                {
                    Console.WriteLine("异常:" + title + "  " + ex.Message);
                    VideoSpiderTools.RecordFile(title + " @ " + "异常信息:" + ex.Message, exceptionFileName);
                }
            }
        }