Exemplo n.º 1
0
        private static List <string> AnalyseVideoList(string url)
        {
            var           htmlContent = NetWorkHandle.GetHtmlContent(url).Item2;
            List <string> childUrls   = new List <string>();
            HtmlDocument  htmlDoc     = new HtmlDocument();

            htmlDoc.LoadHtml(htmlContent);
            var listNode = htmlDoc.DocumentNode.SelectSingleNode(@"//iron-list[@class='playlist-items yt-scrollbar-dark style-scope ytd-playlist-panel-renderer']");
            var olNode   = htmlDoc.DocumentNode.SelectSingleNode(@"//ol[@id='playlist-autoscroll-list']");

            if (olNode == null)
            {
                return(childUrls);
            }

            var liNodes = olNode.Descendants("li");

            foreach (var liNode in liNodes)
            {
                var videoId  = liNode.Attributes["data-video-id"].Value;
                var videoUrl = string.Format(baseVideoUrlFormat, videoId);
                Console.WriteLine(videoUrl);
                childUrls.Add(videoUrl);
            }
            return(childUrls);
        }
Exemplo n.º 2
0
        public static List <SpiderModel> SingleSpider()
        {
            var result = new List <SpiderModel>();

            try
            {
                Console.WriteLine("开始分析网页");

                var gagUrl      = "https://9gag.com/video";
                var htmlContent = NetWorkHandle.GetHtmlContent(gagUrl).Item2;
                //{"id":"  },{"id"
                Regex infosRegex = new Regex("{\"id\":\"" + ".*?" + "{\"id\"");

                var htmlDoc = new HtmlDocument();
                htmlDoc.LoadHtml(htmlContent);
                var matchInfos = infosRegex.Matches(htmlContent);
                foreach (var item in matchInfos)
                {
                    //,{"id"
                }
                var articles = htmlDoc.DocumentNode.SelectNodes(@"//article");
                var nowTime  = DateTime.Now;
                int i        = 1;
                foreach (var item in articles)
                {
                    var spidermodel = new SpiderModel();
                    var add_time    = nowTime.AddMinutes(-4 * i);

                    var title      = item.ChildNodes[1].InnerText.ToString().Trim();
                    var existsList = File.ReadAllLines(VideoInfoPath, Encoding.UTF8);                   //GetTxtList(existsVideoPath);
                    if (existsList.Contains(title))
                    {
                        Console.WriteLine($"{title}--已存在");
                        continue;
                    }
                    var videoUrl = item.ChildNodes[3].ChildNodes[1].ChildNodes[1].GetAttributeValue("data-mp4", "");
                    var imgUrl   = item.ChildNodes[3].ChildNodes[1].ChildNodes[1].ChildNodes[1].GetAttributeValue("poster", "");
                    // data-mp4  poster  Regex.Replace(str, @"\s+", " ");
                    if (string.IsNullOrEmpty(videoUrl))
                    {
                        continue;
                    }
                    spidermodel.Title    = title;
                    spidermodel.videoUrl = videoUrl;
                    spidermodel.ImgUrl   = imgUrl;
                    spidermodel.Id       = Guid.NewGuid();
                    result.Add(spidermodel);
                    Console.WriteLine($"{title}--添加成功");
                }
            }
            catch (Exception ex)
            {
                return(result);
            }
            Console.WriteLine("网页分析完毕");
            return(result);
        }
Exemplo n.º 3
0
        private static List <string> AnalyseVideoUrlListByUserVideoUrl(string url)
        {
            var childUrls   = new List <string>();
            var htmlContent = NetWorkHandle.GetHtmlContent(url).Item2;

            if (string.IsNullOrEmpty(htmlContent))
            {
                return(childUrls);
            }
            var htmlDoc = new HtmlDocument();

            htmlDoc.LoadHtml(htmlContent);
            var htmlNodes = htmlDoc.DocumentNode.SelectNodes(@"//a[@class='yt-uix-sessionlink yt-uix-tile-link  spf-link  yt-ui-ellipsis yt-ui-ellipsis-2']");

            if (htmlNodes == null)
            {
                return(childUrls);
            }
            foreach (var htmlNode in htmlNodes)
            {
                childUrls.Add("https://www.youtube.com/" + htmlNode.GetAttributeValue("href", ""));
            }
            return(childUrls);
        }
Exemplo n.º 4
0
        private static List <string> Analyse(string url)
        {
            List <string> urls = new List <string>();
            //url = "https://space.bilibili.com/25911961/video";
            var code = url.Replace("https://space.bilibili.com/", "").Split('/')[0];
            //https://space.bilibili.com/ajax/member/getSubmitVideos?mid=25911961&pagesize=30&tid=0&page=1&keyword=&order=pubdate
            var jsonUrl     = $"https://space.bilibili.com/ajax/member/getSubmitVideos?mid={code}&pagesize=30&tid=0&page=1&keyword=&order=pubdate";
            var jsonContent = NetWorkHandle.GetHtmlContent(jsonUrl).Item2;

            jsonContent = Regex.Unescape(jsonContent);
            dynamic jsonObj = JsonConvert.DeserializeObject(jsonContent);
            var     data    = jsonObj.data;
            var     vlists  = data.vlist;

            foreach (var vlist in vlists)
            {
                string title    = vlist.title;
                string aid      = vlist.aid;
                var    childUrl = "https://www.bilibili.com/video/av" + aid;
                urls.Add(childUrl);
            }

            return(urls);
        }
Exemplo n.º 5
0
        public static void CollectYoutubeVideos(string url)
        {
            Console.WriteLine("开始采集");
            var          htmlContent = NetWorkHandle.GetHtmlContent(url).Item2;
            var          userName    = "";
            HtmlDocument htmlDoc     = new HtmlDocument();

            htmlDoc.LoadHtml(htmlContent);
            var liNode = htmlDoc.DocumentNode.SelectSingleNode(@"//li[@class='author-attribution']");

            if (liNode is null)
            {
                var titleSpanNode = htmlDoc.DocumentNode.SelectSingleNode(@"//meta[@name='title']");
                userName = titleSpanNode.GetAttributeValue("content", "");// titleSpanNode.InnerText.Trim().ToLower();
            }
            else
            {
                userName = liNode.InnerText.Trim().ToLower();
            }
            userName = HttpUtility.HtmlDecode(userName);
            userName = userName.Replace(" ", "_").Replace(" ", "_");
            if (!Directory.Exists(youtubeUserVideoPath))
            {
                Directory.CreateDirectory(youtubeUserVideoPath);
            }
            var currentUserPath = Path.Combine(youtubeUserVideoPath, userName);

            if (!Directory.Exists(currentUserPath))
            {
                Directory.CreateDirectory(currentUserPath);
            }
            var logPath = Path.Combine(currentUserPath, "video_list.log");

            if (!File.Exists(logPath))
            {
                File.WriteAllText(logPath, "", Encoding.UTF8);
            }
            var childUrls = new List <string>();

            if (new Regex("/user/[^/]+/video").IsMatch(url))
            {
                childUrls = AnalyseVideoUrlListByUserVideoUrl(url);
            }
            else
            {
                childUrls = AnalyseVideoList(url);
            }
            var existUrls = File.ReadAllLines(logPath, Encoding.UTF8);

            foreach (var childUrl in childUrls)
            {
                Console.WriteLine("开始下载--" + childUrl);
                if (existUrls.Contains(childUrl))
                {
                    continue;
                }
                VideoSpiderTools.YoutubedlDownload(childUrl, currentUserPath, false);
                LogHelper.WriteLogs(childUrl, logPath);
                Thread.Sleep(2000);
            }
        }
Exemplo n.º 6
0
        private static List <string> Analyse(string url, int pageNum, int lowViewCount)
        {
            if (!Directory.Exists(videoDir))
            {
                Directory.CreateDirectory(videoDir);
            }
            //url = "http://v.qq.com/vplus/cb5be02aeda6adbbbac790ee1028a77e/videos";
            //http://c.v.qq.com/vchannelinfo?otype=json&uin=cb5be02aeda6adbbbac790ee1028a77e&qm=1&pagenum=3&num=24
            var           currenId = url.Substring(url.IndexOf("vplus/") + 6, url.IndexOf("/videos") - url.IndexOf("vplus/") - 6);
            List <string> urls     = new List <string>();

            for (int i = 1; i < pageNum + 1; i++)
            {
                var currentUrl = string.Format(formatUrl, currenId, i.ToString());

                var content = NetWorkHandle.GetHtmlContent(currentUrl).Item2;
                if (string.IsNullOrEmpty(content))
                {
                    continue;
                }
                content = content.Trim().Replace("QZOutputJson=", "");
                content = content.Substring(0, content.Length - 1);
                dynamic infoObj  = JsonConvert.DeserializeObject(content);
                var     videolst = infoObj["videolst"];
                if (!videolst.HasValues)
                {
                    return(urls);
                }

                foreach (var singleVideolst in videolst)
                {
                    var childUrlObj = singleVideolst["url"];
                    var childUrl    = childUrlObj.Value;
                    Console.WriteLine(childUrl);
                    var playCountStr = Convert.ToString(singleVideolst["play_count"].Value);
                    var playCount    = 0;
                    //1.6万

                    if (playCountStr.Contains("万"))
                    {
                        playCountStr = playCountStr.Replace("万", "");
                        var tempCount = Convert.ToDouble(playCountStr);
                        tempCount = tempCount * 10000;
                        playCount = (int)tempCount;
                    }
                    else
                    {
                        playCount = Convert.ToInt32(playCountStr);
                    }

                    if (playCount < lowViewCount)
                    {
                        continue;
                    }
                    var titleObj = singleVideolst["title"];
                    var title    = titleObj.Value;
                    urls.Add(childUrl);
                }
                Thread.Sleep(2000);
            }
            return(urls);
        }
Exemplo n.º 7
0
        private static void DownloadByList(string url)
        {
            var urls = new List <string>();

            Console.WriteLine("开始采集");
            HtmlWeb web     = new HtmlWeb();
            var     doc     = web.Load(url);
            var     content = NetWorkHandle.GetHtmlContent(url);
            var     aa      = content;
            var     ulNode  = doc.DocumentNode.SelectSingleNode(@"//ul[@class='figure_list']");

            if (ulNode is null)
            {
                return;
            }
            var liNodes = ulNode.Descendants("li");

            if (liNodes is null)
            {
                return;
            }
            foreach (var liNode in liNodes)
            {
                var aNode = liNode.Descendants("a").FirstOrDefault();
                if (aNode is null)
                {
                    continue;
                }
                var href     = aNode.GetAttributeValue("href", "");
                var childUrl = host + href;
                Console.WriteLine(childUrl);
                urls.Add(childUrl);
            }
            var todayDir = Path.Combine(basePath, DateTime.Now.ToString("yyyyMMdd"));

            if (!Directory.Exists(todayDir))
            {
                Directory.CreateDirectory(todayDir);
            }
            foreach (var childUrl in urls)
            {
                Console.WriteLine("下载--" + childUrl);
                try
                {
                    var task = Task.Run(() => {
                        if (VideoSpiderTools.YouGetDownLoad(childUrl, todayDir, false))
                        {
                            Console.WriteLine(childUrl + "--下载成功");
                            //LogHelper.WriteLogs(childUrl.Trim(), logPath);
                        }
                        else
                        {
                            Console.WriteLine(childUrl + "--下载失败");
                        }
                    });
                    if (!task.Wait(TimeSpan.FromMinutes(3)))
                    {
                        Console.WriteLine(childUrl + "--超时退出,下载失败");
                    }
                    Thread.Sleep(2000);
                }
                catch (Exception ex)
                {
                    Console.WriteLine(ex.Message);
                }
            }
        }
Exemplo n.º 8
0
 public virtual bool DownloadFile(string url, string saveFilePath)
 {
     return(NetWorkHandle.DownFileMethod(url, saveFilePath));
 }
Exemplo n.º 9
0
        public static void DownLoadFiles()
        {
            if (!Directory.Exists(basePath))
            {
                Directory.CreateDirectory(basePath);
            }
            if (!File.Exists(VideoInfoPath))
            {
                File.Create(VideoInfoPath);
            }
            var results = SingleSpider();

            foreach (var item in results)
            {
                var id    = item.Id.ToString();
                var title = item.Title.Trim();
                try
                {
                    if (string.IsNullOrEmpty(title))
                    {
                        title = DateTime.Now.ToString("yyyyMMddHHmmss");
                    }
                    var url       = item.videoUrl;
                    var videoName = title + ".mp4";
                    var imgName   = "";
                    if (!string.IsNullOrEmpty(item.ImgUrl))
                    {
                        var imgUrl = item.ImgUrl;
                        Console.WriteLine($"开始下载--{item.ImgUrl}");
                        imgName = title + ".jpg";
                        var imgFilePath = Path.Combine(basePath, imgName);
                        if (NetWorkHandle.DownFileMethod(imgUrl, imgFilePath))
                        {
                            Console.WriteLine($"下载--{item.ImgUrl}--成功");
                        }
                        else
                        {
                            Console.WriteLine($"下载--{item.ImgUrl}--失败");
                        }
                    }

                    var videoFilePath = Path.Combine(basePath, videoName);

                    if (NetWorkHandle.DownFileMethod(url, videoFilePath))
                    {
                        Console.WriteLine($"下载--{item.videoUrl}--成功");
                        //RecordFile(id, RecordFileName: existsFileName, path: basePath);
                        VideoSpiderTools.RecordFile(title, recordFile: existsFileName, path: basePath);
                    }
                    else
                    {
                        Console.WriteLine($"下载--{item.videoUrl}--失败");
                    }
                    Console.WriteLine($"{title}--完成");
                }
                catch (Exception ex)
                {
                    Console.WriteLine("异常:" + title + "  " + ex.Message);
                    VideoSpiderTools.RecordFile(title + " @ " + "异常信息:" + ex.Message, exceptionFileName);
                }
            }
        }